Rashmi0801's picture
Update app.py
1a6274b verified
raw
history blame
4.86 kB
api_key = "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"
import os
import gradio as gr
import requests
from huggingface_hub import InferenceClient, login
from dotenv import load_dotenv
import pandas as pd
# Load environment variables
load_dotenv()
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "meta-llama/llama-4-maverick-17b-128e-instruct"
# Initialize the Llama Maverick client
class MaverickAgent:
def __init__(self):
try:
self.client = InferenceClient(
model=MODEL_NAME,
token=os.getenv("HUGGINGFACE_TOKEN")
)
print("MaverickAgent initialized successfully")
except Exception as e:
print(f"Error initializing MaverickAgent: {e}")
raise
def __call__(self, question: str) -> str:
try:
print(f"Processing question: {question[:100]}...")
# Custom prompt template for the Maverick model
prompt = f"""<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
You are an AI assistant that provides accurate and concise answers to questions.
Be factual and respond with just the answer unless asked to elaborate.
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{question}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>"""
response = self.client.text_generation(
prompt,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
)
# Clean up the response
answer = response.split("<|eot_id|>")[0].strip()
print(f"Generated answer: {answer[:200]}...")
return answer
except Exception as e:
print(f"Error processing question: {e}")
return f"Error: {str(e)}"
# Authentication
try:
login(token=os.getenv("HUGGINGFACE_TOKEN"))
except Exception as e:
print(f"Authentication error: {e}")
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "Please log in with Hugging Face first.", None
# Initialize agent
try:
agent = MaverickAgent()
except Exception as e:
return f"Agent initialization failed: {e}", None
# Fetch questions
try:
response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
questions_data = response.json()
if not questions_data:
return "No questions available.", None
except Exception as e:
return f"Failed to fetch questions: {e}", None
# Process questions
results = []
answers = []
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
try:
answer = agent(question)
answers.append({"task_id": task_id, "submitted_answer": answer})
results.append({
"Task ID": task_id,
"Question": question[:100] + "..." if len(question) > 100 else question,
"Answer": answer[:100] + "..." if len(answer) > 100 else answer
})
except Exception as e:
results.append({
"Task ID": task_id,
"Question": question,
"Answer": f"Error: {str(e)}"
})
# Submit answers
try:
submission = {
"username": profile.username,
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
"answers": answers
}
response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
result = response.json()
return (
f"βœ… Submitted {len(answers)} answers\n"
f"πŸ“Š Score: {result.get('score', 'N/A')}%\n"
f"πŸ”’ Correct: {result.get('correct_count', 0)}/{len(answers)}\n"
f"πŸ€– Model: {MODEL_NAME}",
pd.DataFrame(results)
)
except Exception as e:
return f"Submission failed: {e}", pd.DataFrame(results)
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ¦™ Llama 4 Maverick Agent")
gr.Markdown(f"Using `{MODEL_NAME}` for evaluation")
gr.LoginButton()
with gr.Row():
run_btn = gr.Button("Run Evaluation", variant="primary")
with gr.Row():
status = gr.Textbox(label="Status", interactive=False)
results = gr.DataFrame(label="Results", wrap=True)
run_btn.click(
fn=run_and_submit_all,
outputs=[status, results]
)
if __name__ == "__main__":
demo.launch()