|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
|
|
|
|
|
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
def respond(message, history): |
|
|
|
|
|
prompt = "You are a friendly chatbot.\n\n" |
|
|
|
|
|
for h in history: |
|
|
prompt += f"User: {h[0]}\nAssistant: {h[1]}\n" |
|
|
|
|
|
prompt += f"User: {message}\nAssistant: " |
|
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
output = model.generate( |
|
|
inputs["input_ids"], |
|
|
max_new_tokens=100, |
|
|
temperature=0.7, |
|
|
do_sample=True, |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) |
|
|
return generated_text |
|
|
|
|
|
chatbot = gr.ChatInterface(respond, type="messages") |
|
|
chatbot.launch(debug=True) |