File size: 9,506 Bytes
10e9b7d
 
eccf8e4
3c4371f
991d826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e9b7d
e80aab9
3db6293
e80aab9
991d826
31243f4
991d826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4021bf3
991d826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4371f
991d826
 
 
7e4a06b
991d826
3c4371f
7e4a06b
7d65c66
991d826
7e4a06b
31243f4
 
991d826
31243f4
991d826
31243f4
 
991d826
36ed51a
c1fd3d2
991d826
eccf8e4
31243f4
7d65c66
31243f4
 
7d65c66
991d826
 
7d65c66
 
31243f4
 
 
991d826
 
31243f4
 
991d826
7d65c66
 
31243f4
991d826
 
31243f4
 
991d826
7d65c66
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
 
e80aab9
991d826
7d65c66
991d826
e80aab9
991d826
e80aab9
991d826
0ee0419
e514fd7
 
991d826
 
 
 
e514fd7
e80aab9
7e4a06b
31243f4
9088b99
7d65c66
991d826
e80aab9
 
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import os
import gradio as gr
import requests
import pandas as pd
from langchain.agents import AgentExecutor, create_react_agent
from langchain_huggingface import HuggingFaceHub  # For free HF models
from langchain_core.prompts import PromptTemplate
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.tools import Tool
from langchain_community.tools import PythonREPLTool
import tempfile
import base64
from huggingface_hub import InferenceClient  # For vision and text

# For PDF and Excel handling
try:
    from langchain_community.document_loaders import PyPDFLoader
    import openpyxl
except ImportError:
    pass

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Advanced Agent Definition (No API Key Required) ---
class BasicAgent:
    def __init__(self, api_url):
        print("Advanced BasicAgent initialized with free HF models (no API key).")
        
        # Free HF model for text reasoning (Mistral-7B-Instruct)
        self.llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature": 0.1, "max_new_tokens": 500})
        
        # Free HF Inference Client for vision (e.g., Salesforce/blip-image-captioning-large)
        self.vision_client = InferenceClient(model="Salesforce/blip-image-captioning-large")
        
        # Tools for web search, code execution, and file processing
        self.search_tool = DuckDuckGoSearchRun(name="web_search", description="Search the web for information.")
        self.python_tool = PythonREPLTool(description="Execute Python code for calculations or data processing. Input should be valid Python code.")
        
        # Custom tool for processing files (downloads from API, handles images/PDFs/Excel/text)
        self.file_tool = Tool(
            name="process_file",
            func=self._process_file,
            description="Download and process a file associated with a task. Input format: 'task_id: <id>, file_name: <name>'"
        )
        
        self.tools = [self.search_tool, self.python_tool, self.file_tool]
        
        # React agent prompt template (inspired by GAIA prompting for exact answers)
        self.prompt_template = PromptTemplate.from_template("""
You are an expert AI agent solving GAIA benchmark questions. These questions require reasoning, tool use, and sometimes file processing.

Question: {question}

If the question mentions a file or attachment, use the 'process_file' tool with 'task_id: {task_id}, file_name: {file_name}'.

Reason step-by-step using tools as needed. Output ONLY the final answer in the exact format required by the question. No explanations, no extra text.

{agent_scratchpad}
""")
        
        self.agent = create_react_agent(self.llm, self.tools, self.prompt_template)
        self.executor = AgentExecutor(agent=self.agent, tools=self.tools, verbose=True, handle_parsing_errors=True, max_iterations=10)
        
        self.api_url = api_url

    def _process_file(self, input_str: str) -> str:
        """Internal function to download and process files without keys."""
        try:
            # Parse input
            parts = dict(part.strip().split(': ', 1) for part in input_str.split(', '))
            task_id = parts.get('task_id')
            file_name = parts.get('file_name')
            if not task_id or not file_name:
                return "Invalid input for process_file. Need 'task_id' and 'file_name'."
            
            # Download file
            file_url = f"{self.api_url}/files/{task_id}"
            response = requests.get(file_url, timeout=10)
            response.raise_for_status()
            
            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as tmp:
                tmp.write(response.content)
                file_path = tmp.name
            
            ext = os.path.splitext(file_name)[1].lower()
            
            if ext in ['.jpg', '.png', '.jpeg', '.gif']:
                # Use free HF vision model to describe image
                with open(file_path, "rb") as img_file:
                    description = self.vision_client.image_to_text(image=img_file)
                os.unlink(file_path)
                return description
            
            elif ext == '.pdf':
                loader = PyPDFLoader(file_path)
                docs = loader.load()
                text = "\n\n".join(doc.page_content for doc in docs)
                os.unlink(file_path)
                return text[:20000]  # Truncate if too long
            
            elif ext in ['.xlsx', '.xls']:
                import pandas as pd
                df = pd.read_excel(file_path)
                os.unlink(file_path)
                return df.to_string()
            
            else:
                # Text file
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    text = f.read()
                os.unlink(file_path)
                return text[:20000]
        
        except Exception as e:
            return f"Error processing file: {str(e)}"

    def __call__(self, question: str, task_id: str, file_name: str | None = None) -> str:
        print(f"Agent processing question (first 50 chars): {question[:50]}... (task_id: {task_id}, file: {file_name})")
        input_prompt = question
        if file_name:
            input_prompt += f"\nThere is an attached file '{file_name}'. Use the 'process_file' tool with 'task_id: {task_id}, file_name: {file_name}' to access it."
        
        try:
            response = self.executor.invoke({"question": input_prompt})
            answer = response['output'].strip()
            print(f"Agent returning answer: {answer}")
            return answer
        except Exception as e:
            print(f"Error generating answer: {e}")
            return "Agent error occurred."

# Update the run_and_submit_all to pass task_id and file_name to agent
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        return "Please Login to Hugging Face with the button.", None
    
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    
    try:
        agent = BasicAgent(api_url)
    except Exception as e:
        return f"Error initializing agent: {e}", None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(agent_code)
    
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Error fetching questions: {e}", None
    
    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        file_name = item.get("file_name")  # Adjust based on API response; assume it provides 'file_name' if attached
        if not task_id or not question_text:
            continue
        try:
            submitted_answer = agent(question_text, task_id, file_name)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
    
    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
    
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Advanced Agent Evaluation Runner for GAIA (No API Key Required)")
    gr.Markdown(
        """
        **Instructions:**
        1. Log in to Hugging Face.
        2. Click 'Run Evaluation & Submit All Answers'.
        
        This agent uses free Hugging Face models (Mistral for text, BLIP for vision) with tools for search, code, and files to aim for 30-50%+ scores without any API keys.
        """
    )
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)