Spaces:

kaitwithkwk
/

kwk-first-chatbot-revised

Sleeping

Update app.py

835a1b7 verified 7 months ago

1.24 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	# Choose a chat-specific model
	model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # A great smaller chat model

	# Load tokenizer and create pipeline
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto" # Uses GPU if available, falls back to CPU
	)

	def respond(message, history):
	# Format conversation history into a prompt
	prompt = "You are a friendly chatbot.\n\n"

	for h in history:
	prompt += f"User: {h[0]}\nAssistant: {h[1]}\n"

	prompt += f"User: {message}\nAssistant: "

	# Generate response
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	output = model.generate(
	inputs["input_ids"],
	max_new_tokens=100,
	temperature=0.7,
	do_sample=True,
	)

	# Decode and return only the new tokens (the response)
	generated_text = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
	return generated_text

	chatbot = gr.ChatInterface(respond, type="messages")
	chatbot.launch(debug=True)