File size: 9,934 Bytes
10e9b7d
 
eccf8e4
3c4371f
5dd5f4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e9b7d
e80aab9
3db6293
e80aab9
5dd5f4f
 
31243f4
5dd5f4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4021bf3
5dd5f4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4371f
5dd5f4f
 
 
7e4a06b
5dd5f4f
3c4371f
7e4a06b
7d65c66
5dd5f4f
7e4a06b
31243f4
 
5dd5f4f
31243f4
5dd5f4f
31243f4
 
5dd5f4f
36ed51a
c1fd3d2
5dd5f4f
eccf8e4
31243f4
7d65c66
31243f4
 
7d65c66
5dd5f4f
 
7d65c66
 
31243f4
 
 
5dd5f4f
 
31243f4
 
5dd5f4f
7d65c66
 
31243f4
5dd5f4f
 
31243f4
 
5dd5f4f
7d65c66
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
 
e80aab9
5dd5f4f
7d65c66
5dd5f4f
e80aab9
5dd5f4f
e80aab9
5dd5f4f
0ee0419
e514fd7
 
5dd5f4f
 
 
 
 
e514fd7
e80aab9
7e4a06b
31243f4
9088b99
7d65c66
5dd5f4f
e80aab9
 
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import os
import gradio as gr
import requests
import pandas as pd
from langchain.agents import AgentExecutor, create_react_agent
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.tools import Tool
from langchain_community.tools import PythonREPLTool
import tempfile
import base64
from langchain_core.messages import HumanMessage

# For PDF and Excel handling - these imports will be used in process_file
try:
    from langchain_community.document_loaders import PyPDFLoader
    import openpyxl  # For Excel
except ImportError:
    pass  # Assume installed during HF build

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Advanced Agent Definition ---
# ----- THIS IS WHERE THE ADVANCED LOGIC IS BUILT FOR HIGHER SCORES -----
class BasicAgent:
    def __init__(self, api_url):
        print("Advanced BasicAgent initialized with tool support.")
        openai_api_key = os.getenv("OPENAI_API_KEY")
        if not openai_api_key:
            raise ValueError("OPENAI_API_KEY must be set in Hugging Face Space variables for the agent to work.")
        
        # Use a strong model like gpt-4o for better reasoning and vision
        self.llm = ChatOpenAI(temperature=0, model="gpt-4o", api_key=openai_api_key)
        
        # Tools for web search, code execution, and file processing
        self.search_tool = DuckDuckGoSearchRun(name="web_search", description="Search the web for information.")
        self.python_tool = PythonREPLTool(description="Execute Python code for calculations or data processing. Input should be valid Python code.")
        
        # Custom tool for processing files (downloads from API, handles images/PDFs/Excel/text)
        self.file_tool = Tool(
            name="process_file",
            func=self._process_file,
            description="Download and process a file associated with a task. Input format: 'task_id: <id>, file_name: <name>'"
        )
        
        self.tools = [self.search_tool, self.python_tool, self.file_tool]
        
        # React agent prompt template (inspired by GAIA prompting for exact answers)
        self.prompt_template = PromptTemplate.from_template("""
You are an expert AI agent solving GAIA benchmark questions. These questions require reasoning, tool use, and sometimes file processing.

Question: {question}

If the question mentions a file or attachment, use the 'process_file' tool with 'task_id: <task_id>, file_name: <file_name>'.

Reason step-by-step using tools as needed. Output ONLY the final answer in the exact format required by the question. No explanations, no extra text.

{agent_scratchpad}
""")
        
        self.agent = create_react_agent(self.llm, self.tools, self.prompt_template)
        self.executor = AgentExecutor(agent=self.agent, tools=self.tools, verbose=True, handle_parsing_errors=True, max_iterations=10)
        
        self.api_url = api_url

    def _process_file(self, input_str: str) -> str:
        """Internal function to download and process files."""
        try:
            # Parse input
            parts = dict(part.strip().split(': ', 1) for part in input_str.split(', '))
            task_id = parts.get('task_id')
            file_name = parts.get('file_name')
            if not task_id or not file_name:
                return "Invalid input for process_file. Need 'task_id' and 'file_name'."
            
            # Download file
            file_url = f"{self.api_url}/files/{task_id}"
            response = requests.get(file_url, timeout=10)
            response.raise_for_status()
            
            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as tmp:
                tmp.write(response.content)
                file_path = tmp.name
            
            ext = os.path.splitext(file_name)[1].lower()
            
            if ext in ['.jpg', '.png', '.jpeg', '.gif']:
                # Use vision to describe image
                with open(file_path, "rb") as img_file:
                    base64_image = base64.b64encode(img_file.read()).decode('utf-8')
                message = HumanMessage(content=[
                    {"type": "text", "text": "Describe this image in detail, focusing on elements relevant to the question."},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                ])
                description = self.llm.invoke([message]).content
                os.unlink(file_path)
                return description
            
            elif ext == '.pdf':
                loader = PyPDFLoader(file_path)
                docs = loader.load()
                text = "\n\n".join(doc.page_content for doc in docs)
                os.unlink(file_path)
                return text[:20000]  # Truncate if too long
            
            elif ext in ['.xlsx', '.xls']:
                import pandas as pd
                df = pd.read_excel(file_path)
                os.unlink(file_path)
                return df.to_string()
            
            else:
                # Text file
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    text = f.read()
                os.unlink(file_path)
                return text[:20000]
        
        except Exception as e:
            return f"Error processing file: {str(e)}"

    def __call__(self, question: str, task_id: str, file_name: str | None = None) -> str:
        print(f"Agent processing question (first 50 chars): {question[:50]}... (task_id: {task_id}, file: {file_name})")
        input_prompt = question
        if file_name:
            input_prompt += f"\nThere is an attached file '{file_name}'. Use the 'process_file' tool with 'task_id: {task_id}, file_name: {file_name}' to access it."
        
        try:
            response = self.executor.invoke({"question": input_prompt})
            answer = response['output'].strip()
            print(f"Agent returning answer: {answer}")
            return answer
        except Exception as e:
            print(f"Error generating answer: {e}")
            return "Agent error occurred."

# Update the run_and_submit_all to pass task_id and file_name to agent
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        return "Please Login to Hugging Face with the button.", None
    
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    
    try:
        agent = BasicAgent(api_url)
    except Exception as e:
        return f"Error initializing agent: {e}", None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(agent_code)
    
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Error fetching questions: {e}", None
    
    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        file_name = item.get("file_name")  # Assuming the API provides 'file_name'; if not, check item for attachments
        if not task_id or not question_text:
            continue
        try:
            submitted_answer = agent(question_text, task_id, file_name)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
    
    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
    
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Advanced Agent Evaluation Runner for GAIA (Aiming for 60%+)")
    gr.Markdown(
        """
        **Instructions:**
        1. Set OPENAI_API_KEY in Hugging Face Space variables (Settings > Variables).
        2. Log in to Hugging Face.
        3. Click 'Run Evaluation & Submit All Answers'.
        
        This agent uses GPT-4o with tools for search, code execution, and file processing (images/PDFs/Excel) to achieve higher scores.
        """
    )
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)