Text Generation
Transformers
Safetensors
llama
research
code
mathematics
reasoning
multilingual
long-context
custom_code
text-generation-inference
Instructions to use DeepXR/Helion-V2.5-Rnd with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DeepXR/Helion-V2.5-Rnd with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use DeepXR/Helion-V2.5-Rnd with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DeepXR/Helion-V2.5-Rnd" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DeepXR/Helion-V2.5-Rnd", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/DeepXR/Helion-V2.5-Rnd
- SGLang
How to use DeepXR/Helion-V2.5-Rnd with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DeepXR/Helion-V2.5-Rnd" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DeepXR/Helion-V2.5-Rnd", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DeepXR/Helion-V2.5-Rnd" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DeepXR/Helion-V2.5-Rnd", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use DeepXR/Helion-V2.5-Rnd with Docker Model Runner:
docker model run hf.co/DeepXR/Helion-V2.5-Rnd
| { | |
| "model_family": "Helion", | |
| "version": "2.5", | |
| "release_type": "research_and_development", | |
| "variants": { | |
| "base": { | |
| "name": "Helion-2.5-Rnd", | |
| "full_name": "DeepXR/Helion-2.5-Rnd", | |
| "description": "Base research model with full precision (FP16)", | |
| "parameters": "70B", | |
| "precision": "float16", | |
| "context_length": 131072, | |
| "safetensors_shards": 83, | |
| "shard_naming": "shard_00 to shard_82", | |
| "shard_size_gb": 1.69, | |
| "shard_size_gib": 1.57, | |
| "total_size_gb": 140.27, | |
| "status": "active", | |
| "recommended_use": [ | |
| "Research", | |
| "Development", | |
| "High-accuracy inference" | |
| ], | |
| "hardware_requirements": { | |
| "min_vram_gb": 145, | |
| "min_gpus": 2, | |
| "recommended_gpu": "A100 80GB" | |
| } | |
| }, | |
| "instruct": { | |
| "name": "Helion-2.5-Rnd-Instruct", | |
| "full_name": "DeepXR/Helion-2.5-Rnd-Instruct", | |
| "description": "Instruction-tuned variant optimized for following instructions", | |
| "parameters": "70B", | |
| "precision": "bfloat16", | |
| "context_length": 131072, | |
| "status": "planned", | |
| "recommended_use": [ | |
| "Instruction following", | |
| "Task completion", | |
| "Structured outputs" | |
| ], | |
| "fine_tuning": { | |
| "type": "supervised", | |
| "data_focus": "instruction_pairs" | |
| } | |
| }, | |
| "chat": { | |
| "name": "Helion-2.5-Rnd-Chat", | |
| "full_name": "DeepXR/Helion-2.5-Rnd-Chat", | |
| "description": "Conversational variant optimized for multi-turn dialogue", | |
| "parameters": "70B", | |
| "precision": "bfloat16", | |
| "context_length": 131072, | |
| "status": "planned", | |
| "recommended_use": [ | |
| "Conversational AI", | |
| "Customer service", | |
| "Interactive applications" | |
| ], | |
| "fine_tuning": { | |
| "type": "rlhf", | |
| "data_focus": "conversational_data" | |
| } | |
| }, | |
| "code": { | |
| "name": "Helion-2.5-Rnd-Code", | |
| "full_name": "DeepXR/Helion-2.5-Rnd-Code", | |
| "description": "Code-specialized variant with enhanced programming capabilities", | |
| "parameters": "70B", | |
| "precision": "bfloat16", | |
| "context_length": 131072, | |
| "status": "planned", | |
| "recommended_use": [ | |
| "Code generation", | |
| "Code review", | |
| "Bug fixing", | |
| "Documentation" | |
| ], | |
| "fine_tuning": { | |
| "type": "supervised", | |
| "data_focus": "code_repositories" | |
| }, | |
| "enhanced_languages": [ | |
| "Python", | |
| "JavaScript", | |
| "TypeScript", | |
| "Rust", | |
| "Go", | |
| "Java" | |
| ] | |
| }, | |
| "math": { | |
| "name": "Helion-2.5-Rnd-Math", | |
| "full_name": "DeepXR/Helion-2.5-Rnd-Math", | |
| "description": "Mathematics-specialized variant for advanced problem solving", | |
| "parameters": "70B", | |
| "precision": "bfloat16", | |
| "context_length": 131072, | |
| "status": "planned", | |
| "recommended_use": [ | |
| "Mathematical reasoning", | |
| "Proof generation", | |
| "Problem solving", | |
| "Educational applications" | |
| ], | |
| "fine_tuning": { | |
| "type": "supervised", | |
| "data_focus": "mathematical_proofs" | |
| } | |
| } | |
| }, | |
| "deployment_configurations": { | |
| "production": { | |
| "description": "Production-ready configuration with optimizations", | |
| "settings": { | |
| "tensor_parallel_size": 4, | |
| "gpu_memory_utilization": 0.95, | |
| "max_batch_size": 32, | |
| "enable_prefix_caching": true, | |
| "enable_chunked_prefill": true | |
| } | |
| }, | |
| "development": { | |
| "description": "Development configuration for testing", | |
| "settings": { | |
| "tensor_parallel_size": 2, | |
| "gpu_memory_utilization": 0.85, | |
| "max_batch_size": 8, | |
| "enable_prefix_caching": false, | |
| "enable_chunked_prefill": false | |
| } | |
| }, | |
| "research": { | |
| "description": "Research configuration for experimentation", | |
| "settings": { | |
| "tensor_parallel_size": 2, | |
| "gpu_memory_utilization": 0.90, | |
| "max_batch_size": 4, | |
| "enable_prefix_caching": false, | |
| "enable_chunked_prefill": false, | |
| "enable_logging": true | |
| } | |
| } | |
| }, | |
| "comparison_matrix": { | |
| "base_vs_instruct": { | |
| "base_advantages": [ | |
| "More flexible for fine-tuning", | |
| "Better for creative tasks", | |
| "Less constrained outputs" | |
| ], | |
| "instruct_advantages": [ | |
| "Better instruction following", | |
| "More structured outputs", | |
| "Improved task completion" | |
| ] | |
| }, | |
| "base_vs_chat": { | |
| "base_advantages": [ | |
| "Better for single-turn tasks", | |
| "More diverse outputs", | |
| "Flexible formatting" | |
| ], | |
| "chat_advantages": [ | |
| "Better conversation coherence", | |
| "Improved context awareness", | |
| "Natural dialogue flow" | |
| ] | |
| } | |
| }, | |
| "migration_guide": { | |
| "from_base_to_instruct": { | |
| "steps": [ | |
| "Update prompt format to instruction style", | |
| "Adjust temperature (typically lower)", | |
| "Add explicit task descriptions", | |
| "Use structured output formats" | |
| ], | |
| "example_prompt_change": { | |
| "base": "Write a function to sort a list", | |
| "instruct": "### Instruction:\nWrite a Python function that sorts a list in ascending order.\n\n### Response:" | |
| } | |
| }, | |
| "from_base_to_chat": { | |
| "steps": [ | |
| "Convert to chat message format", | |
| "Add system prompts", | |
| "Maintain conversation history", | |
| "Use appropriate message roles" | |
| ], | |
| "example_format_change": { | |
| "base": "Hello, how are you?", | |
| "chat": [ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful assistant." | |
| }, | |
| { | |
| "role": "user", | |
| "content": "Hello, how are you?" | |
| } | |
| ] | |
| } | |
| } | |
| }, | |
| "version_history": { | |
| "2.5.0-rnd": { | |
| "release_date": "2025-01-30", | |
| "status": "current", | |
| "changes": [ | |
| "Initial research release", | |
| "70B parameter model", | |
| "131K context with YARN", | |
| "SafeTensors format (96 shards)", | |
| "Full precision (BF16)" | |
| ] | |
| } | |
| }, | |
| "roadmap": { | |
| "upcoming_variants": [ | |
| { | |
| "name": "Helion-2.5-Rnd-Instruct", | |
| "expected": "Q2 2025", | |
| "status": "in_development" | |
| }, | |
| { | |
| "name": "Helion-2.5-Rnd-Chat", | |
| "expected": "Q2 2025", | |
| "status": "planned" | |
| }, | |
| { | |
| "name": "Helion-2.5-Rnd-Code", | |
| "expected": "Q3 2025", | |
| "status": "planned" | |
| } | |
| ], | |
| "future_features": [ | |
| "Multi-modal capabilities", | |
| "Extended context to 256K", | |
| "Improved multilingual support", | |
| "Domain-specific variants" | |
| ] | |
| } | |
| } |