File size: 4,864 Bytes
e252ecc
7c4d1b9
80328ae
1a6274b
80328ae
1a6274b
80328ae
1a6274b
80328ae
 
 
 
1a6274b
80328ae
1a6274b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80328ae
1a6274b
80328ae
1a6274b
80328ae
 
1a6274b
 
 
 
 
 
 
 
 
 
 
 
4b4ec32
1a6274b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b4ec32
1a6274b
 
 
 
 
 
80328ae
 
1a6274b
 
 
 
 
 
4b4ec32
1a6274b
80328ae
1a6274b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38c5157
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
api_key = "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"

import os
import gradio as gr
import requests
from huggingface_hub import InferenceClient, login
from dotenv import load_dotenv
import pandas as pd

# Load environment variables
load_dotenv()

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "meta-llama/llama-4-maverick-17b-128e-instruct"

# Initialize the Llama Maverick client
class MaverickAgent:
    def __init__(self):
        try:
            self.client = InferenceClient(
                model=MODEL_NAME,
                token=os.getenv("HUGGINGFACE_TOKEN")
            )
            print("MaverickAgent initialized successfully")
        except Exception as e:
            print(f"Error initializing MaverickAgent: {e}")
            raise

    def __call__(self, question: str) -> str:
        try:
            print(f"Processing question: {question[:100]}...")
            
            # Custom prompt template for the Maverick model
            prompt = f"""<|begin_of_text|>
            <|start_header_id|>system<|end_header_id|>
            You are an AI assistant that provides accurate and concise answers to questions.
            Be factual and respond with just the answer unless asked to elaborate.
            <|eot_id|>
            <|start_header_id|>user<|end_header_id|>
            {question}
            <|eot_id|>
            <|start_header_id|>assistant<|end_header_id|>"""
            
            response = self.client.text_generation(
                prompt,
                max_new_tokens=256,
                temperature=0.7,
                do_sample=True,
            )
            
            # Clean up the response
            answer = response.split("<|eot_id|>")[0].strip()
            print(f"Generated answer: {answer[:200]}...")
            return answer
        except Exception as e:
            print(f"Error processing question: {e}")
            return f"Error: {str(e)}"

# Authentication
try:
    login(token=os.getenv("HUGGINGFACE_TOKEN"))
except Exception as e:
    print(f"Authentication error: {e}")

def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please log in with Hugging Face first.", None

    # Initialize agent
    try:
        agent = MaverickAgent()
    except Exception as e:
        return f"Agent initialization failed: {e}", None

    # Fetch questions
    try:
        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        questions_data = response.json()
        if not questions_data:
            return "No questions available.", None
    except Exception as e:
        return f"Failed to fetch questions: {e}", None

    # Process questions
    results = []
    answers = []
    for i, item in enumerate(questions_data):
        task_id = item.get("task_id")
        question = item.get("question")
        if not task_id or not question:
            continue
            
        try:
            answer = agent(question)
            answers.append({"task_id": task_id, "submitted_answer": answer})
            results.append({
                "Task ID": task_id,
                "Question": question[:100] + "..." if len(question) > 100 else question,
                "Answer": answer[:100] + "..." if len(answer) > 100 else answer
            })
        except Exception as e:
            results.append({
                "Task ID": task_id,
                "Question": question,
                "Answer": f"Error: {str(e)}"
            })

    # Submit answers
    try:
        submission = {
            "username": profile.username,
            "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
            "answers": answers
        }
        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
        result = response.json()
        
        return (
            f"✅ Submitted {len(answers)} answers\n"
            f"📊 Score: {result.get('score', 'N/A')}%\n"
            f"🔢 Correct: {result.get('correct_count', 0)}/{len(answers)}\n"
            f"🤖 Model: {MODEL_NAME}",
            pd.DataFrame(results)
        )
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results)

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🦙 Llama 4 Maverick Agent")
    gr.Markdown(f"Using `{MODEL_NAME}` for evaluation")
    
    gr.LoginButton()
    
    with gr.Row():
        run_btn = gr.Button("Run Evaluation", variant="primary")
        
    with gr.Row():
        status = gr.Textbox(label="Status", interactive=False)
        results = gr.DataFrame(label="Results", wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        outputs=[status, results]
    )

if __name__ == "__main__":
    demo.launch()