from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr

# 모델 이름 변경 EleutherAI/polyglot-ko-1.3b -> skt/kogpt2-base-v2
model_name = "skt/kogpt2-base-v2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=64,
    do_sample=True,
    temperature=0.5,
    top_p=0.9,
)

def chat_fn(prompt):
    try:
        outputs = pipe(prompt)
        # text-generation 파이프라인의 출력은 리스트 형태로 반환되며,
        # 각 출력은 generated_text 키를 포함합니다
        return outputs[0]["generated_text"]
    except Exception as e:
        print(f"Error in chat_fn: {str(e)}")  # 디버깅을 위한 에러 로깅 추가
        return f"Error: {str(e)}"


with gr.Blocks() as demo:
    with gr.Row():
        input_box = gr.Textbox(label="Prompt", lines=2)
    with gr.Row():
        output_box = gr.Textbox(label="Response")

    btn = gr.Button("Generate")
    btn.click(chat_fn, inputs=input_box, outputs=output_box)

    # ✅ Hugging Face Spaces의 API 요청용 endpoint 정의
    gr.Examples(
        examples=["안녕?", "한국에 대해 말해줘"],
        inputs=input_box
    )

    demo.load(chat_fn, inputs=input_box, outputs=output_box)

# ✅ API endpoint로 사용할 Interface 객체 등록
api_demo = gr.Interface(
    fn=chat_fn,
    inputs="text",
    outputs="text",
    api_name="predict"  # API 엔드포인트 이름 명시
)

if __name__ == "__main__":
    demo.queue()
    api_demo.launch(share=False)