File size: 968 Bytes
0ff477a
 
 
9bf139e
7d7624b
0ff477a
9bf139e
7d7624b
 
0ff477a
 
7d7624b
0ff477a
 
7d7624b
 
 
 
9bf139e
0ff477a
 
 
 
 
 
 
7d7624b
0ff477a
7d7624b
 
0ff477a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI


MODEL_ID = "rasyosef/Phi-1_5-Instruct-v0.1"


tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID)

app = FastAPI()

@app.get("/chat")
def chat(query: str):
    """
    GET /chat?query=Your+question
    Returns JSON: {"answer": "...model’s reply..."}
    """
    prompt expected by Phi‐1.5 Instruct
    prompt = (
        "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
        "<|im_start|>user\n" + query + "<|im_end|>"
        "<|im_start|>assistant\n"
    )
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=200)
    # Only decode newly generated tokens (skip the “prompt” tokens)
    response = tokenizer.decode(
        outputs[0][inputs.input_ids.shape[-1]:], 
        skip_special_tokens=True
    )
    return {"answer": response.strip()}