import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Specify the model ID
model_id = "MBZUAI-Paris/Atlas-Chat-2B"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",  # Automatically selects the device
    torch_dtype=torch.bfloat16  # Use bfloat16 for efficiency
)

# Define the text generation function
def generate_text(prompt, max_length=100, temperature=0.7):
    # Prepare the input message in chat format
    messages = [{"role": "user", "content": prompt}]
    
    # Tokenize the input with the chat template
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        return_dict=True,
        add_generation_prompt=True
    ).to(model.device)
    
    # Generate the response
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        temperature=temperature,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        num_return_sequences=1
    )
    
    # Decode and return the generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=4, label="Enter Prompt"),
        gr.Slider(minimum=50, maximum=300, step=10, value=100, label="Max Length"),
        gr.Slider(minimum=0.1, maximum=1.5, step=0.1, value=0.7, label="Temperature")
    ],
    outputs="text",
    title="Atlas-Chat-27B Text Generator",
    description="Powered by the MBZUAI-Paris/Atlas-Chat-27B model."
)

# Launch the interface
interface.launch()