Spaces:

agents-course
/

Final_Assignment_Template

Running

App Files Files Community

242

Final_Assignment_Template / app.py

dar3512

Update app.py

5dd5f4f verified 3 days ago

raw

history blame

9.93 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	from langchain.agents import AgentExecutor, create_react_agent
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import PromptTemplate
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain.tools import Tool
	from langchain_community.tools import PythonREPLTool
	import tempfile
	import base64
	from langchain_core.messages import HumanMessage

	# For PDF and Excel handling - these imports will be used in process_file
	try:
	from langchain_community.document_loaders import PyPDFLoader
	import openpyxl # For Excel
	except ImportError:
	pass # Assume installed during HF build

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- Advanced Agent Definition ---
	# ----- THIS IS WHERE THE ADVANCED LOGIC IS BUILT FOR HIGHER SCORES -----
	class BasicAgent:
	def __init__(self, api_url):
	print("Advanced BasicAgent initialized with tool support.")
	openai_api_key = os.getenv("OPENAI_API_KEY")
	if not openai_api_key:
	raise ValueError("OPENAI_API_KEY must be set in Hugging Face Space variables for the agent to work.")

	# Use a strong model like gpt-4o for better reasoning and vision
	self.llm = ChatOpenAI(temperature=0, model="gpt-4o", api_key=openai_api_key)

	# Tools for web search, code execution, and file processing
	self.search_tool = DuckDuckGoSearchRun(name="web_search", description="Search the web for information.")
	self.python_tool = PythonREPLTool(description="Execute Python code for calculations or data processing. Input should be valid Python code.")

	# Custom tool for processing files (downloads from API, handles images/PDFs/Excel/text)
	self.file_tool = Tool(
	name="process_file",
	func=self._process_file,
	description="Download and process a file associated with a task. Input format: 'task_id: <id>, file_name: <name>'"
	)

	self.tools = [self.search_tool, self.python_tool, self.file_tool]

	# React agent prompt template (inspired by GAIA prompting for exact answers)
	self.prompt_template = PromptTemplate.from_template("""
	You are an expert AI agent solving GAIA benchmark questions. These questions require reasoning, tool use, and sometimes file processing.

	Question: {question}

	If the question mentions a file or attachment, use the 'process_file' tool with 'task_id: <task_id>, file_name: <file_name>'.

	Reason step-by-step using tools as needed. Output ONLY the final answer in the exact format required by the question. No explanations, no extra text.

	{agent_scratchpad}
	""")

	self.agent = create_react_agent(self.llm, self.tools, self.prompt_template)
	self.executor = AgentExecutor(agent=self.agent, tools=self.tools, verbose=True, handle_parsing_errors=True, max_iterations=10)

	self.api_url = api_url

	def _process_file(self, input_str: str) -> str:
	"""Internal function to download and process files."""
	try:
	# Parse input
	parts = dict(part.strip().split(': ', 1) for part in input_str.split(', '))
	task_id = parts.get('task_id')
	file_name = parts.get('file_name')
	if not task_id or not file_name:
	return "Invalid input for process_file. Need 'task_id' and 'file_name'."

	# Download file
	file_url = f"{self.api_url}/files/{task_id}"
	response = requests.get(file_url, timeout=10)
	response.raise_for_status()

	with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as tmp:
	tmp.write(response.content)
	file_path = tmp.name

	ext = os.path.splitext(file_name)[1].lower()

	if ext in ['.jpg', '.png', '.jpeg', '.gif']:
	# Use vision to describe image
	with open(file_path, "rb") as img_file:
	base64_image = base64.b64encode(img_file.read()).decode('utf-8')
	message = HumanMessage(content=[
	{"type": "text", "text": "Describe this image in detail, focusing on elements relevant to the question."},
	{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
	])
	description = self.llm.invoke([message]).content
	os.unlink(file_path)
	return description

	elif ext == '.pdf':
	loader = PyPDFLoader(file_path)
	docs = loader.load()
	text = "\n\n".join(doc.page_content for doc in docs)
	os.unlink(file_path)
	return text[:20000] # Truncate if too long

	elif ext in ['.xlsx', '.xls']:
	import pandas as pd
	df = pd.read_excel(file_path)
	os.unlink(file_path)
	return df.to_string()

	else:
	# Text file
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	text = f.read()
	os.unlink(file_path)
	return text[:20000]

	except Exception as e:
	return f"Error processing file: {str(e)}"

	def __call__(self, question: str, task_id: str, file_name: str \| None = None) -> str:
	print(f"Agent processing question (first 50 chars): {question[:50]}... (task_id: {task_id}, file: {file_name})")
	input_prompt = question
	if file_name:
	input_prompt += f"\nThere is an attached file '{file_name}'. Use the 'process_file' tool with 'task_id: {task_id}, file_name: {file_name}' to access it."

	try:
	response = self.executor.invoke({"question": input_prompt})
	answer = response['output'].strip()
	print(f"Agent returning answer: {answer}")
	return answer
	except Exception as e:
	print(f"Error generating answer: {e}")
	return "Agent error occurred."

	# Update the run_and_submit_all to pass task_id and file_name to agent
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	space_id = os.getenv("SPACE_ID")
	if profile:
	username = profile.username
	print(f"User logged in: {username}")
	else:
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	try:
	agent = BasicAgent(api_url)
	except Exception as e:
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
	print(agent_code)

	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	print(f"Fetched {len(questions_data)} questions.")
	except Exception as e:
	return f"Error fetching questions: {e}", None

	results_log = []
	answers_payload = []
	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	file_name = item.get("file_name") # Assuming the API provides 'file_name'; if not, check item for attachments
	if not task_id or not question_text:
	continue
	try:
	submitted_answer = agent(question_text, task_id, file_name)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
	except Exception as e:
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

	if not answers_payload:
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	return final_status, pd.DataFrame(results_log)
	except Exception as e:
	return f"Submission Failed: {e}", pd.DataFrame(results_log)

	# --- Gradio Interface ---
	with gr.Blocks() as demo:
	gr.Markdown("# Advanced Agent Evaluation Runner for GAIA (Aiming for 60%+)")
	gr.Markdown(
	"""
	Instructions:
	1. Set OPENAI_API_KEY in Hugging Face Space variables (Settings > Variables).
	2. Log in to Hugging Face.
	3. Click 'Run Evaluation & Submit All Answers'.

	This agent uses GPT-4o with tools for search, code execution, and file processing (images/PDFs/Excel) to achieve higher scores.
	"""
	)
	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
	run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

	if __name__ == "__main__":
	demo.launch(debug=True, share=False)