Instructions to use DeepXR/Helion-V2.5-Rnd with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use DeepXR/Helion-V2.5-Rnd with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use DeepXR/Helion-V2.5-Rnd with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "DeepXR/Helion-V2.5-Rnd"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2.5-Rnd",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/DeepXR/Helion-V2.5-Rnd

SGLang

How to use DeepXR/Helion-V2.5-Rnd with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "DeepXR/Helion-V2.5-Rnd" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2.5-Rnd",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "DeepXR/Helion-V2.5-Rnd" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2.5-Rnd",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use DeepXR/Helion-V2.5-Rnd with Docker Model Runner:
```
docker model run hf.co/DeepXR/Helion-V2.5-Rnd
```

Helion-V2.5-Rnd / model_variants.json

Trouter-Library

Update model_variants.json

706ebc2 verified 5 months ago

raw

history blame contribute delete

6.73 kB

	{
	"model_family": "Helion",
	"version": "2.5",
	"release_type": "research_and_development",
	"variants": {
	"base": {
	"name": "Helion-2.5-Rnd",
	"full_name": "DeepXR/Helion-2.5-Rnd",
	"description": "Base research model with full precision (FP16)",
	"parameters": "70B",
	"precision": "float16",
	"context_length": 131072,
	"safetensors_shards": 83,
	"shard_naming": "shard_00 to shard_82",
	"shard_size_gb": 1.69,
	"shard_size_gib": 1.57,
	"total_size_gb": 140.27,
	"status": "active",
	"recommended_use": [
	"Research",
	"Development",
	"High-accuracy inference"
	],
	"hardware_requirements": {
	"min_vram_gb": 145,
	"min_gpus": 2,
	"recommended_gpu": "A100 80GB"
	}
	},
	"instruct": {
	"name": "Helion-2.5-Rnd-Instruct",
	"full_name": "DeepXR/Helion-2.5-Rnd-Instruct",
	"description": "Instruction-tuned variant optimized for following instructions",
	"parameters": "70B",
	"precision": "bfloat16",
	"context_length": 131072,
	"status": "planned",
	"recommended_use": [
	"Instruction following",
	"Task completion",
	"Structured outputs"
	],
	"fine_tuning": {
	"type": "supervised",
	"data_focus": "instruction_pairs"
	}
	},
	"chat": {
	"name": "Helion-2.5-Rnd-Chat",
	"full_name": "DeepXR/Helion-2.5-Rnd-Chat",
	"description": "Conversational variant optimized for multi-turn dialogue",
	"parameters": "70B",
	"precision": "bfloat16",
	"context_length": 131072,
	"status": "planned",
	"recommended_use": [
	"Conversational AI",
	"Customer service",
	"Interactive applications"
	],
	"fine_tuning": {
	"type": "rlhf",
	"data_focus": "conversational_data"
	}
	},
	"code": {
	"name": "Helion-2.5-Rnd-Code",
	"full_name": "DeepXR/Helion-2.5-Rnd-Code",
	"description": "Code-specialized variant with enhanced programming capabilities",
	"parameters": "70B",
	"precision": "bfloat16",
	"context_length": 131072,
	"status": "planned",
	"recommended_use": [
	"Code generation",
	"Code review",
	"Bug fixing",
	"Documentation"
	],
	"fine_tuning": {
	"type": "supervised",
	"data_focus": "code_repositories"
	},
	"enhanced_languages": [
	"Python",
	"JavaScript",
	"TypeScript",
	"Rust",
	"Go",
	"Java"
	]
	},
	"math": {
	"name": "Helion-2.5-Rnd-Math",
	"full_name": "DeepXR/Helion-2.5-Rnd-Math",
	"description": "Mathematics-specialized variant for advanced problem solving",
	"parameters": "70B",
	"precision": "bfloat16",
	"context_length": 131072,
	"status": "planned",
	"recommended_use": [
	"Mathematical reasoning",
	"Proof generation",
	"Problem solving",
	"Educational applications"
	],
	"fine_tuning": {
	"type": "supervised",
	"data_focus": "mathematical_proofs"
	}
	}
	},
	"deployment_configurations": {
	"production": {
	"description": "Production-ready configuration with optimizations",
	"settings": {
	"tensor_parallel_size": 4,
	"gpu_memory_utilization": 0.95,
	"max_batch_size": 32,
	"enable_prefix_caching": true,
	"enable_chunked_prefill": true
	}
	},
	"development": {
	"description": "Development configuration for testing",
	"settings": {
	"tensor_parallel_size": 2,
	"gpu_memory_utilization": 0.85,
	"max_batch_size": 8,
	"enable_prefix_caching": false,
	"enable_chunked_prefill": false
	}
	},
	"research": {
	"description": "Research configuration for experimentation",
	"settings": {
	"tensor_parallel_size": 2,
	"gpu_memory_utilization": 0.90,
	"max_batch_size": 4,
	"enable_prefix_caching": false,
	"enable_chunked_prefill": false,
	"enable_logging": true
	}
	}
	},
	"comparison_matrix": {
	"base_vs_instruct": {
	"base_advantages": [
	"More flexible for fine-tuning",
	"Better for creative tasks",
	"Less constrained outputs"
	],
	"instruct_advantages": [
	"Better instruction following",
	"More structured outputs",
	"Improved task completion"
	]
	},
	"base_vs_chat": {
	"base_advantages": [
	"Better for single-turn tasks",
	"More diverse outputs",
	"Flexible formatting"
	],
	"chat_advantages": [
	"Better conversation coherence",
	"Improved context awareness",
	"Natural dialogue flow"
	]
	}
	},
	"migration_guide": {
	"from_base_to_instruct": {
	"steps": [
	"Update prompt format to instruction style",
	"Adjust temperature (typically lower)",
	"Add explicit task descriptions",
	"Use structured output formats"
	],
	"example_prompt_change": {
	"base": "Write a function to sort a list",
	"instruct": "### Instruction:\nWrite a Python function that sorts a list in ascending order.\n\n### Response:"
	}
	},
	"from_base_to_chat": {
	"steps": [
	"Convert to chat message format",
	"Add system prompts",
	"Maintain conversation history",
	"Use appropriate message roles"
	],
	"example_format_change": {
	"base": "Hello, how are you?",
	"chat": [
	{
	"role": "system",
	"content": "You are a helpful assistant."
	},
	{
	"role": "user",
	"content": "Hello, how are you?"
	}
	]
	}
	}
	},
	"version_history": {
	"2.5.0-rnd": {
	"release_date": "2025-01-30",
	"status": "current",
	"changes": [
	"Initial research release",
	"70B parameter model",
	"131K context with YARN",
	"SafeTensors format (96 shards)",
	"Full precision (BF16)"
	]
	}
	},
	"roadmap": {
	"upcoming_variants": [
	{
	"name": "Helion-2.5-Rnd-Instruct",
	"expected": "Q2 2025",
	"status": "in_development"
	},
	{
	"name": "Helion-2.5-Rnd-Chat",
	"expected": "Q2 2025",
	"status": "planned"
	},
	{
	"name": "Helion-2.5-Rnd-Code",
	"expected": "Q3 2025",
	"status": "planned"
	}
	],
	"future_features": [
	"Multi-modal capabilities",
	"Extended context to 256K",
	"Improved multilingual support",
	"Domain-specific variants"
	]
	}
	}