# app.py
import gradio as gr
from transformers import pipeline

# Инициализируем модель text-generation (например, LLaMA / smolLM)
chat_model = pipeline(
    "text-generation",
    model="HuggingFaceTB/SmolLM2-135M-Instruct",
    device_map="auto",  # вариант: "cpu" если без GPU
)

def chat_fn(message, history):
    """
    message: str — запрос пользователя
    history: list of dict {'role':..., 'content':...}
    """
    history = history or []
    # Добавляем сообщение пользователя в историю
    history.append({"role": "user", "content": message})
    # Формируем вход для модели
    full_prompt = "\n".join(f"{m['role']}: {m['content']}" for m in history)
    output = chat_model(full_prompt, max_new_tokens=100, do_sample=True)
    reply = output[0]["generated_text"].split(full_prompt)[-1].strip()
    # Добавляем ответ в историю
    history.append({"role": "assistant", "content": reply})
    return reply, history

iface = gr.ChatInterface(
    fn=chat_fn,
    type="messages",
    title="Gradio + transformers Chat",
    examples=["Привет!", "Расскажи анекдот", "Что такое LLaMA?"],
)

if __name__ == "__main__":
    iface.launch()