Adekiitan11 commited on
Commit
e908cae
·
verified ·
1 Parent(s): b127576

Upload app/main.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app/main.py +27 -0
app/main.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from app.model import model, tokenizer
5
+ import torch
6
+
7
+ app = FastAPI()
8
+
9
+ class Prompt(BaseModel):
10
+ text: str
11
+
12
+ @app.post("/generate")
13
+ def generate(prompt: Prompt):
14
+ full_prompt = f"<s>[INST] {prompt.text.strip()} [/INST]"
15
+ inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ with torch.no_grad():
18
+ outputs = model.generate(
19
+ **inputs,
20
+ max_new_tokens=256,
21
+ temperature=0.7,
22
+ top_p=0.9,
23
+ do_sample=True
24
+ )
25
+
26
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
+ return {"response": result.split("[/INST]")[-1].strip()}