Spaces:
Sleeping
Sleeping
File size: 1,680 Bytes
8dd0ad9 f7215c2 8dd0ad9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
# Access the Hugging Face token from the environment variable
HF_TOKEN = os.getenv("HF_Token")
from huggingface_hub import login
# Log in with token
login(token=os.getenv("HF_Token"))
# Load the LLaMA 3.2 1B Instruct model and tokenizer
model_name = "meta-llama/Llama-3.2-1B-Instruct" # Replace with actual Hugging Face model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
# Gradio app
with gr.Blocks() as demo:
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
clear = gr.ClearButton([msg, chatbot])
def respond(message, chat_history):
# Add user message to chat history
chat_history.append({"role": "user", "content": message})
# Prepare input for the model
conversation = "\n".join([f"{turn['role'].capitalize()}: {turn['content']}" for turn in chat_history])
input_ids = tokenizer(conversation, return_tensors="pt").input_ids.to(model.device)
# Generate response
outputs = model.generate(input_ids, max_length=200, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
bot_message = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Add bot response to chat history
chat_history.append({"role": "assistant", "content": bot_message})
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
if __name__ == "__main__":
demo.launch()
|