AiAgent / model /yehia_interface.py
tarek29910's picture
Upload yehia_interface.py
e67f862 verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# === Model setup ===
model_name = "Navid-AI/Yehia-7B-preview"
# Use token if private model
hf_token = os.getenv("HUGGINGFACE_TOKEN", None)
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
use_auth_token=hf_token
)
# === Inference function ===
def query_yehia(user_message: str, history=None):
if history is None:
history = []
messages = [
{"role": "system", "content": "ุฃู†ุช ูŠุญูŠู‰ุŒ ูˆูƒูŠู„ ู…ุจูŠุนุงุช ุนู‚ุงุฑูŠ ุฐูƒูŠ ู…ุชุฎุตุต ููŠ ุชู‚ุฏูŠู… ุงู„ู…ุดูˆุฑุฉ ู„ู„ุนู…ู„ุงุก ุจุงู„ู„ุบุฉ ุงู„ุนุฑุจูŠุฉ."},
*history,
{"role": "user", "content": user_message}
]
# Apply chat template and move to model device
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
# Generate output
outputs = model.generate(
inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
# Decode only the new tokens (skip the prompt)
response = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)
return response.strip()