Spaces:
Runtime error
Runtime error
from fastapi import FastAPI, Request | |
from pydantic import BaseModel | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import os | |
app = FastAPI() | |
class UserInput(BaseModel): | |
prompt: str | |
# Load model and tokenizer | |
model_name = "NousResearch/Nous-Hermes-llama2-13b" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
async def chat(data: UserInput): | |
output = generator( | |
data.prompt, | |
max_new_tokens=512, | |
do_sample=True, | |
temperature=0.8, | |
top_k=50, | |
top_p=0.95 | |
) | |
return {"response": output[0]["generated_text"]} | |