Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import os | |
# === Model setup === | |
model_name = "Navid-AI/Yehia-7B-preview" | |
# Use token if private model | |
hf_token = os.getenv("HUGGINGFACE_TOKEN", None) | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
use_auth_token=hf_token | |
) | |
# === Inference function === | |
def query_yehia(user_message: str, history=None): | |
if history is None: | |
history = [] | |
messages = [ | |
{"role": "system", "content": "ุฃูุช ูุญููุ ูููู ู ุจูุนุงุช ุนูุงุฑู ุฐูู ู ุชุฎุตุต ูู ุชูุฏูู ุงูู ุดูุฑุฉ ููุนู ูุงุก ุจุงููุบุฉ ุงูุนุฑุจูุฉ."}, | |
*history, | |
{"role": "user", "content": user_message} | |
] | |
# Apply chat template and move to model device | |
inputs = tokenizer.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
return_tensors="pt" | |
).to(model.device) | |
# Generate output | |
outputs = model.generate( | |
inputs, | |
max_new_tokens=512, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.95, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# Decode only the new tokens (skip the prompt) | |
response = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True) | |
return response.strip() | |