Update app.py
Browse files
app.py
CHANGED
@@ -7,10 +7,12 @@ from PIL import Image, ImageDraw, ImageFont
|
|
7 |
from huggingface_hub import login
|
8 |
import requests
|
9 |
import json
|
10 |
-
import base64
|
11 |
-
import re
|
12 |
import time
|
13 |
-
import pandas as pd
|
|
|
|
|
14 |
# Attempt to login using environment token
|
15 |
try:
|
16 |
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
|
@@ -155,38 +157,21 @@ def get_gaia_api_questions():
|
|
155 |
return None, f"An unexpected error occurred: {e}"
|
156 |
|
157 |
def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
|
158 |
-
"""
|
159 |
-
Fetches the content of the primary file associated with a task_id from the GAIA API.
|
160 |
-
Returns raw_bytes, detected_mime_type, and file_name.
|
161 |
-
associated_file_metadata_list is the 'files' list from the question data.
|
162 |
-
"""
|
163 |
-
# If no metadata, assume no file to fetch for this specialized getter.
|
164 |
-
# Or, if the API always serves THE file for task_id, then metadata is just for info.
|
165 |
-
# Let's assume the API /files/{task_id} always gives the relevant file if one exists for the task.
|
166 |
-
|
167 |
file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
|
168 |
print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
|
169 |
-
|
170 |
try:
|
171 |
response = requests.get(file_url, timeout=30)
|
172 |
-
response.raise_for_status()
|
173 |
-
|
174 |
raw_bytes = response.content
|
175 |
detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
|
176 |
-
|
177 |
-
# Try to get a filename from metadata if available, otherwise default
|
178 |
-
file_name = "attached_file"
|
179 |
if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
|
180 |
-
# Assuming the first file in metadata is the one fetched, or provides its name
|
181 |
first_file_meta = associated_file_metadata_list[0]
|
182 |
if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
|
183 |
file_name = first_file_meta['file_name']
|
184 |
-
|
185 |
print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
|
186 |
return raw_bytes, detected_mime_type, file_name
|
187 |
-
|
188 |
except requests.exceptions.HTTPError as http_err:
|
189 |
-
# Specifically handle 404 for "no file" vs other errors
|
190 |
if http_err.response.status_code == 404:
|
191 |
print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
|
192 |
else:
|
@@ -199,136 +184,225 @@ def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata
|
|
199 |
print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
|
200 |
return None, None, None
|
201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
else:
|
212 |
-
|
|
|
213 |
|
214 |
-
# Step 2: Remove wrapping quotes if any
|
215 |
-
if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
|
216 |
-
answer = answer[1:-1].strip()
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
221 |
|
222 |
-
|
223 |
-
answer = re.sub(r'\s*,\s*', ',', answer)
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
return answer
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
print(f"Agent (GAIA-Grounded Gemini) processing Task ID: {task_id}, Question: {question}")
|
232 |
-
if files_metadata: # This is the list of file metadata dicts
|
233 |
-
print(f"File metadata associated with this task: {files_metadata}")
|
234 |
|
235 |
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
236 |
if not gemini_api_key:
|
237 |
-
print("Error: GEMINI_API_KEY not found in environment variables. Please set it in Space Secrets.")
|
238 |
return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
|
239 |
|
240 |
-
# --- GAIA-specific System Prompt ---
|
241 |
-
# Adapted from Figure 2 of GAIA Paper [cite: 103, 104, 105, 106, 107, 108]
|
242 |
system_prompt_lines = [
|
243 |
"You are a general AI assistant. I will ask you a question.",
|
244 |
-
"
|
245 |
-
"
|
246 |
-
"
|
247 |
-
"If
|
|
|
|
|
248 |
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
|
249 |
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
|
250 |
-
"
|
251 |
-
"If external files are mentioned or provided, use their content if relevant and accessible to answer the question.",
|
252 |
]
|
253 |
-
# We won't send this as a separate "system" message in Gemini's typical API structure,
|
254 |
-
# but rather prepend it to the user question for a single turn.
|
255 |
|
256 |
-
|
257 |
gemini_parts = []
|
258 |
-
|
259 |
-
# Prepend system prompt guidelines to the main question text part
|
260 |
-
user_question_text = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
|
261 |
|
262 |
-
# --- File Handling ---
|
|
|
263 |
file_content_bytes, detected_mime_type, file_name = None, None, None
|
264 |
-
|
|
|
265 |
file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
"
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
-
###########################################################################################################################
|
314 |
-
else: # No file content fetched or no files associated
|
315 |
-
gemini_parts.append({"text": user_question_text})
|
316 |
|
317 |
payload = {
|
318 |
"contents": [{"role": "user", "parts": gemini_parts}],
|
319 |
-
"generationConfig": {
|
320 |
-
"temperature": 0.2, # Lower temperature for more factual/deterministic GAIA answers
|
321 |
-
"maxOutputTokens": 300, # Increased slightly for potentially more complex answers
|
322 |
-
}
|
323 |
}
|
324 |
-
|
325 |
api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
|
326 |
agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
|
327 |
|
328 |
try:
|
329 |
headers = {"Content-Type": "application/json"}
|
330 |
-
print(f"Calling Gemini API for task {task_id}
|
331 |
-
response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=
|
332 |
response.raise_for_status()
|
333 |
result = response.json()
|
334 |
|
@@ -336,34 +410,23 @@ def my_agent_logic(task_id: str, question: str, files_metadata: list = None): #
|
|
336 |
result["candidates"][0].get("content") and
|
337 |
result["candidates"][0]["content"].get("parts") and
|
338 |
result["candidates"][0]["content"]["parts"][0].get("text")):
|
339 |
-
|
340 |
-
|
341 |
-
print(f"Raw Gemini output: {raw_answer}") # Debugging log
|
342 |
-
agent_computed_answer = clean_final_answer(raw_answer)
|
343 |
-
|
344 |
else:
|
345 |
-
print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {result}")
|
346 |
if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
|
347 |
block_reason = result["promptFeedback"]["blockReason"]
|
348 |
-
print(f"Gemini API blocked the prompt for task {task_id}. Reason: {block_reason}")
|
349 |
agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
|
350 |
else:
|
351 |
agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
|
352 |
except requests.exceptions.Timeout:
|
353 |
-
print(f"Timeout error calling Gemini API for task {task_id}.")
|
354 |
agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
|
355 |
except requests.exceptions.RequestException as e:
|
356 |
-
print(f"
|
357 |
-
if e.response is not None:
|
358 |
-
print(f"Gemini API Error Response Status: {e.response.status_code}")
|
359 |
-
try: print(f"Gemini API Error Response Body: {e.response.json()}")
|
360 |
-
except json.JSONDecodeError: print(f"Gemini API Error Response Body (text): {e.response.text}")
|
361 |
agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
|
362 |
except Exception as e:
|
363 |
-
print(f"An unexpected error occurred in my_agent_logic for task {task_id}: {e}")
|
364 |
agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
|
365 |
-
|
366 |
-
print(f"Agent (GAIA-Grounded Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
|
367 |
return agent_computed_answer
|
368 |
|
369 |
def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
|
@@ -389,20 +452,25 @@ def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
|
|
389 |
log_messages.append(f"Processing 1 random question based on user choice.")
|
390 |
elif run_all_questions:
|
391 |
log_messages.append(f"Processing all {len(tasks_to_process)} questions.")
|
|
|
|
|
|
|
|
|
|
|
392 |
for task in tasks_to_process:
|
393 |
task_id = task.get("task_id")
|
394 |
question = task.get("question")
|
395 |
-
associated_files_metadata = task.get("files", [])
|
396 |
if task_id and question:
|
397 |
log_messages.append(f"\nProcessing Task ID: {task_id}")
|
398 |
log_messages.append(f"Question: {question}")
|
399 |
if associated_files_metadata:
|
400 |
log_messages.append(f"Associated files metadata: {associated_files_metadata}")
|
401 |
-
# Pass the files_metadata to the agent logic
|
402 |
submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
|
403 |
log_messages.append(f"Agent's Answer: {submitted_answer}")
|
404 |
answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
405 |
-
|
|
|
406 |
else:
|
407 |
log_messages.append(f"Skipping malformed task: {task}")
|
408 |
if not answers_to_submit:
|
@@ -461,7 +529,7 @@ def submit_agent_answers(profile: gr.OAuthProfile, answers_for_submission_state)
|
|
461 |
submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
|
462 |
return "\n".join(submission_log_messages)
|
463 |
|
464 |
-
# --- Gradio Interface (largely unchanged
|
465 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
466 |
gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
|
467 |
gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
|
@@ -474,7 +542,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
474 |
with gr.Tabs():
|
475 |
with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
|
476 |
gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
|
477 |
-
gr.Markdown("This agent uses the Gemini API
|
478 |
run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
|
479 |
run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
|
480 |
gr.Markdown("### Agent Run Log & Generated Answers:")
|
|
|
7 |
from huggingface_hub import login
|
8 |
import requests
|
9 |
import json
|
10 |
+
import base64
|
11 |
+
import re # For advanced string cleaning
|
12 |
import time
|
13 |
+
import pandas as pd # For spreadsheet handling
|
14 |
+
from io import StringIO # For capturing print output from exec
|
15 |
+
|
16 |
# Attempt to login using environment token
|
17 |
try:
|
18 |
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
157 |
return None, f"An unexpected error occurred: {e}"
|
158 |
|
159 |
def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
|
161 |
print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
|
|
|
162 |
try:
|
163 |
response = requests.get(file_url, timeout=30)
|
164 |
+
response.raise_for_status()
|
|
|
165 |
raw_bytes = response.content
|
166 |
detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
|
167 |
+
file_name = "attached_file" # Default
|
|
|
|
|
168 |
if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
|
|
|
169 |
first_file_meta = associated_file_metadata_list[0]
|
170 |
if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
|
171 |
file_name = first_file_meta['file_name']
|
|
|
172 |
print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
|
173 |
return raw_bytes, detected_mime_type, file_name
|
|
|
174 |
except requests.exceptions.HTTPError as http_err:
|
|
|
175 |
if http_err.response.status_code == 404:
|
176 |
print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
|
177 |
else:
|
|
|
184 |
print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
|
185 |
return None, None, None
|
186 |
|
187 |
+
def execute_python_code(code_string: str):
|
188 |
+
"""
|
189 |
+
Safely executes a string of Python code and captures its standard output.
|
190 |
+
Returns the captured output or an error message.
|
191 |
+
"""
|
192 |
+
print(f"Attempting to execute Python code:\n{code_string[:500]}...") # Log first 500 chars
|
193 |
+
# Create a new StringIO object to capture stdout
|
194 |
+
old_stdout = sys.stdout
|
195 |
+
sys.stdout = captured_output = StringIO()
|
196 |
+
|
197 |
+
execution_result = None
|
198 |
+
error_message = None
|
199 |
|
200 |
+
try:
|
201 |
+
# Execute the code in a restricted namespace
|
202 |
+
# For safety, you might want to further restrict the available builtins/modules
|
203 |
+
# For this benchmark, we assume the provided Python code is generally safe.
|
204 |
+
local_namespace = {}
|
205 |
+
exec(code_string, {"__builtins__": __builtins__}, local_namespace)
|
206 |
+
|
207 |
+
# Try to get a 'final_answer' variable if it exists, as some questions might expect it
|
208 |
+
if 'final_answer' in local_namespace:
|
209 |
+
execution_result = str(local_namespace['final_answer'])
|
210 |
+
|
211 |
+
except Exception as e:
|
212 |
+
print(f"Error executing Python code: {e}")
|
213 |
+
error_message = f"Execution Error: {type(e).__name__}: {e}"
|
214 |
+
finally:
|
215 |
+
# Restore stdout
|
216 |
+
sys.stdout = old_stdout
|
217 |
+
|
218 |
+
# Get the content of captured_output
|
219 |
+
printed_output = captured_output.getvalue().strip()
|
220 |
+
|
221 |
+
if execution_result:
|
222 |
+
# If 'final_answer' was found, prioritize it
|
223 |
+
return execution_result, None
|
224 |
+
elif printed_output:
|
225 |
+
# If 'final_answer' not found, but something was printed, return that
|
226 |
+
return printed_output, None
|
227 |
+
elif error_message:
|
228 |
+
# If there was an error during execution
|
229 |
+
return None, error_message
|
230 |
else:
|
231 |
+
# If no 'final_answer', nothing printed, and no error (e.g., script only defines functions)
|
232 |
+
return "Python code executed without explicit output or 'final_answer' variable.", None
|
233 |
|
|
|
|
|
|
|
234 |
|
235 |
+
def clean_final_answer(raw_text: str) -> str:
|
236 |
+
"""More robustly cleans the raw text output from the LLM."""
|
237 |
+
if not isinstance(raw_text, str):
|
238 |
+
return "" # Should not happen, but good to be safe
|
239 |
|
240 |
+
answer = raw_text.strip()
|
|
|
241 |
|
242 |
+
# Attempt to extract content after "FINAL ANSWER:" if it's still present
|
243 |
+
# This regex is more robust to variations in spacing and casing
|
244 |
+
final_answer_match = re.search(r"FINAL ANSWER:\s*(.*)", answer, re.IGNORECASE | re.DOTALL)
|
245 |
+
if final_answer_match:
|
246 |
+
answer = final_answer_match.group(1).strip()
|
247 |
+
|
248 |
+
# Remove common conversational prefixes more aggressively
|
249 |
+
common_prefixes = [
|
250 |
+
"The answer is", "The final answer is", "So, the answer is", "Therefore, the answer is",
|
251 |
+
"Based on the information, the answer is", "The correct answer is", "My answer is",
|
252 |
+
"Okay, the answer is", "Sure, the answer is", "Here is the answer:", "The solution is",
|
253 |
+
"Answer:", "Result:"
|
254 |
+
]
|
255 |
+
for prefix in common_prefixes:
|
256 |
+
if answer.lower().startswith(prefix.lower()):
|
257 |
+
answer = answer[len(prefix):].strip()
|
258 |
+
# Remove potential colon or period after prefix
|
259 |
+
if answer.startswith(":") or answer.startswith("."):
|
260 |
+
answer = answer[1:].strip()
|
261 |
+
break # Stop after first prefix match
|
262 |
+
|
263 |
+
# Remove wrapping quotes (single or double)
|
264 |
+
if len(answer) >= 2:
|
265 |
+
if (answer.startswith('"') and answer.endswith('"')) or \
|
266 |
+
(answer.startswith("'") and answer.endswith("'")):
|
267 |
+
answer = answer[1:-1].strip()
|
268 |
+
|
269 |
+
# Specific GAIA formatting: remove units like $ or % unless specified otherwise by the question
|
270 |
+
# This is tricky to do generally, as some questions might require them.
|
271 |
+
# The prompt already tells Gemini about this. This is a fallback.
|
272 |
+
# For now, let's keep it simple and rely on the prompt.
|
273 |
+
# If a question asks for "USD with two decimal places", the LLM should include '$'.
|
274 |
+
# answer = answer.replace('$', '').replace('%', '').strip() # Re-evaluating if this is too aggressive
|
275 |
+
|
276 |
+
# Normalize spaces around commas for comma-separated lists
|
277 |
+
answer = re.sub(r'\s*,\s*', ',', answer)
|
278 |
+
|
279 |
+
# Remove trailing punctuation if it seems unintended (e.g. a lone period)
|
280 |
+
if len(answer) > 1 and answer.endswith(".") and not re.search(r"[a-zA-Z0-9]\.[a-zA-Z0-9]", answer): # Avoid stripping from e.g. "file.txt"
|
281 |
+
# Check if the part before the period is a number or a short phrase
|
282 |
+
# This is to avoid stripping periods from full sentences if the LLM disobeys "few words"
|
283 |
+
if not answer[:-1].strip().isdigit() and len(answer[:-1].strip().split()) > 3:
|
284 |
+
pass # Likely a sentence, keep period
|
285 |
+
else:
|
286 |
+
answer = answer[:-1].strip()
|
287 |
+
|
288 |
return answer
|
289 |
+
|
290 |
+
def my_agent_logic(task_id: str, question: str, files_metadata: list = None):
|
291 |
+
print(f"Agent (Enhanced Tools + Gemini) processing Task ID: {task_id}, Question: {question}")
|
292 |
+
if files_metadata:
|
293 |
+
print(f"File metadata associated: {files_metadata}")
|
|
|
|
|
|
|
294 |
|
295 |
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
296 |
if not gemini_api_key:
|
|
|
297 |
return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
|
298 |
|
|
|
|
|
299 |
system_prompt_lines = [
|
300 |
"You are a general AI assistant. I will ask you a question.",
|
301 |
+
"Your primary goal is to provide the single, exact, concise, and factual answer to the question.",
|
302 |
+
"Do not include any conversational fluff, disclaimers, explanations, or any introductory phrases like 'The answer is:'. Your response should be ONLY the answer itself.",
|
303 |
+
"Do not use markdown formatting unless the question explicitly asks for it.",
|
304 |
+
"If the question implies a specific format (e.g., a number, a date, a comma-separated list), provide the answer in that format.",
|
305 |
+
"Do NOT include the phrase 'FINAL ANSWER:' in your response to me.",
|
306 |
+
"If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise by the question.",
|
307 |
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
|
308 |
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
|
309 |
+
"If external files or tool outputs are provided below, use their content if relevant and accessible to answer the question.",
|
|
|
310 |
]
|
|
|
|
|
311 |
|
312 |
+
user_question_text_for_gemini = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
|
313 |
gemini_parts = []
|
|
|
|
|
|
|
314 |
|
315 |
+
# --- File & Tool Handling ---
|
316 |
+
tool_output_description = ""
|
317 |
file_content_bytes, detected_mime_type, file_name = None, None, None
|
318 |
+
|
319 |
+
if files_metadata:
|
320 |
file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
|
321 |
|
322 |
+
if file_content_bytes:
|
323 |
+
if file_name and file_name.lower().endswith(".py") and detected_mime_type in ["text/x-python", "application/x-python-code", "text/plain"]:
|
324 |
+
print(f"Detected Python file: {file_name}")
|
325 |
+
try:
|
326 |
+
python_code = file_content_bytes.decode('utf-8')
|
327 |
+
execution_result, exec_error = execute_python_code(python_code)
|
328 |
+
if exec_error:
|
329 |
+
tool_output_description += f"\n\nExecution of Python file '{file_name}' failed: {exec_error}"
|
330 |
+
elif execution_result:
|
331 |
+
tool_output_description += f"\n\nOutput from executing Python file '{file_name}':\n{execution_result}"
|
332 |
+
else:
|
333 |
+
tool_output_description += f"\n\nPython file '{file_name}' executed without specific return or error."
|
334 |
+
except Exception as e_py_decode:
|
335 |
+
tool_output_description += f"\n\nError decoding Python file '{file_name}': {e_py_decode}"
|
336 |
+
|
337 |
+
elif detected_mime_type and detected_mime_type.startswith("image/"):
|
338 |
+
try:
|
339 |
+
base64_image = base64.b64encode(file_content_bytes).decode('utf-8')
|
340 |
+
gemini_parts.append({"inline_data": {"mime_type": detected_mime_type, "data": base64_image}})
|
341 |
+
tool_output_description += f"\n\nAn image file '{file_name}' ({detected_mime_type}) is provided. Refer to it if relevant."
|
342 |
+
print(f"Added image {file_name} to Gemini parts for task {task_id}.")
|
343 |
+
except Exception as e_img:
|
344 |
+
tool_output_description += f"\n\n[Agent note: Error processing image file '{file_name}': {e_img}]"
|
345 |
+
|
346 |
+
elif detected_mime_type and detected_mime_type.startswith("audio/"): # mp3, m4a, wav, etc.
|
347 |
+
try:
|
348 |
+
base64_audio = base64.b64encode(file_content_bytes).decode('utf-8')
|
349 |
+
gemini_parts.append({"inline_data": {"mime_type": detected_mime_type, "data": base64_audio}})
|
350 |
+
tool_output_description += f"\n\nAn audio file '{file_name}' ({detected_mime_type}) is provided. Transcribe or analyze it if relevant to the question."
|
351 |
+
print(f"Added audio {file_name} to Gemini parts for task {task_id}.")
|
352 |
+
except Exception as e_audio:
|
353 |
+
tool_output_description += f"\n\n[Agent note: Error processing audio file '{file_name}': {e_audio}]"
|
354 |
+
|
355 |
+
elif detected_mime_type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel", "text/csv"]:
|
356 |
+
try:
|
357 |
+
if "csv" in detected_mime_type: df = pd.read_csv(io.BytesIO(file_content_bytes))
|
358 |
+
else: df = pd.read_excel(io.BytesIO(file_content_bytes))
|
359 |
+
|
360 |
+
# Provide a more comprehensive preview
|
361 |
+
preview_rows = min(10, len(df))
|
362 |
+
preview_cols = min(5, len(df.columns))
|
363 |
+
preview_df = df.iloc[:preview_rows, :preview_cols]
|
364 |
+
df_description = f"First {preview_rows} rows and first {preview_cols} columns (if available):\n{preview_df.to_string(index=True)}\nTotal rows: {len(df)}, Total columns: {len(df.columns)}."
|
365 |
+
if len(df.columns) > preview_cols:
|
366 |
+
df_description += f"\nOther columns include: {list(df.columns[preview_cols:])}"
|
367 |
+
|
368 |
+
tool_output_description += f"\n\nData from spreadsheet '{file_name}':\n{df_description}"
|
369 |
+
print(f"Added spreadsheet preview for {file_name} to tool output description.")
|
370 |
+
except Exception as e_xls:
|
371 |
+
tool_output_description += f"\n\n[Agent note: Unable to parse spreadsheet '{file_name}': {e_xls}]"
|
372 |
+
|
373 |
+
elif detected_mime_type == "text/plain":
|
374 |
+
try:
|
375 |
+
text_content = file_content_bytes.decode('utf-8')
|
376 |
+
tool_output_description += f"\n\nContent of attached text file '{file_name}':\n{text_content[:2000]}" # Limit length
|
377 |
+
print(f"Added text file content '{file_name}' to tool output description.")
|
378 |
+
except Exception as e_txt:
|
379 |
+
tool_output_description += f"\n\n[Agent note: A text file '{file_name}' was associated but could not be decoded: {e_txt}]"
|
380 |
+
else:
|
381 |
+
tool_output_description += f"\n\nNote: A file named '{file_name}' (type: {detected_mime_type or 'unknown'}) is associated. Its content could not be directly processed by current tools."
|
382 |
+
elif files_metadata : # File metadata exists but no bytes fetched (e.g. 404)
|
383 |
+
tool_output_description += f"\n\nNote: File(s) {files_metadata} were listed for this task, but could not be fetched or processed."
|
384 |
+
|
385 |
|
386 |
+
# Append the main question and any tool/file processing notes as a single text part if no multimodal data was added yet,
|
387 |
+
# or as the first text part if multimodal data (image/audio) is present.
|
388 |
+
final_user_text_for_gemini = user_question_text_for_gemini + tool_output_description
|
389 |
+
if not any(p.get("inline_data") for p in gemini_parts): # If no image/audio was added
|
390 |
+
gemini_parts.append({"text": final_user_text_for_gemini})
|
391 |
+
else: # If image/audio was added, insert text part at the beginning
|
392 |
+
gemini_parts.insert(0, {"text": final_user_text_for_gemini})
|
393 |
|
|
|
|
|
|
|
394 |
|
395 |
payload = {
|
396 |
"contents": [{"role": "user", "parts": gemini_parts}],
|
397 |
+
"generationConfig": {"temperature": 0.1, "maxOutputTokens": 350} # Very low temp for GAIA
|
|
|
|
|
|
|
398 |
}
|
|
|
399 |
api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
|
400 |
agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
|
401 |
|
402 |
try:
|
403 |
headers = {"Content-Type": "application/json"}
|
404 |
+
print(f"Calling Gemini API for task {task_id} with payload structure: {[(k, type(v)) for p in payload['contents'] for part in p['parts'] for k,v in part.items()]}")
|
405 |
+
response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=90) # Increased timeout slightly
|
406 |
response.raise_for_status()
|
407 |
result = response.json()
|
408 |
|
|
|
410 |
result["candidates"][0].get("content") and
|
411 |
result["candidates"][0]["content"].get("parts") and
|
412 |
result["candidates"][0]["content"]["parts"][0].get("text")):
|
413 |
+
raw_answer_from_gemini = result["candidates"][0]["content"]["parts"][0]["text"].strip()
|
414 |
+
agent_computed_answer = clean_final_answer(raw_answer_from_gemini)
|
|
|
|
|
|
|
415 |
else:
|
416 |
+
print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {json.dumps(result, indent=2)}")
|
417 |
if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
|
418 |
block_reason = result["promptFeedback"]["blockReason"]
|
|
|
419 |
agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
|
420 |
else:
|
421 |
agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
|
422 |
except requests.exceptions.Timeout:
|
|
|
423 |
agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
|
424 |
except requests.exceptions.RequestException as e:
|
425 |
+
if e.response is not None: print(f"Gemini API Error Response Status: {e.response.status_code}, Body: {e.response.text}")
|
|
|
|
|
|
|
|
|
426 |
agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
|
427 |
except Exception as e:
|
|
|
428 |
agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
|
429 |
+
print(f"Agent (Enhanced Tools + Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
|
|
|
430 |
return agent_computed_answer
|
431 |
|
432 |
def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
|
|
|
452 |
log_messages.append(f"Processing 1 random question based on user choice.")
|
453 |
elif run_all_questions:
|
454 |
log_messages.append(f"Processing all {len(tasks_to_process)} questions.")
|
455 |
+
|
456 |
+
# Need to import sys for execute_python_code's stdout capture
|
457 |
+
global sys
|
458 |
+
import sys
|
459 |
+
|
460 |
for task in tasks_to_process:
|
461 |
task_id = task.get("task_id")
|
462 |
question = task.get("question")
|
463 |
+
associated_files_metadata = task.get("files", [])
|
464 |
if task_id and question:
|
465 |
log_messages.append(f"\nProcessing Task ID: {task_id}")
|
466 |
log_messages.append(f"Question: {question}")
|
467 |
if associated_files_metadata:
|
468 |
log_messages.append(f"Associated files metadata: {associated_files_metadata}")
|
|
|
469 |
submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
|
470 |
log_messages.append(f"Agent's Answer: {submitted_answer}")
|
471 |
answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
472 |
+
if run_all_questions: # Add a small delay if processing all questions to be kind to APIs
|
473 |
+
time.sleep(1) # 1-second delay between processing each question
|
474 |
else:
|
475 |
log_messages.append(f"Skipping malformed task: {task}")
|
476 |
if not answers_to_submit:
|
|
|
529 |
submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
|
530 |
return "\n".join(submission_log_messages)
|
531 |
|
532 |
+
# --- Gradio Interface (largely unchanged) ---
|
533 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
534 |
gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
|
535 |
gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
|
|
|
542 |
with gr.Tabs():
|
543 |
with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
|
544 |
gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
|
545 |
+
gr.Markdown("This agent uses the Gemini API with enhanced tool handling (Python execution, audio, spreadsheets) to generate answers.")
|
546 |
run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
|
547 |
run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
|
548 |
gr.Markdown("### Agent Run Log & Generated Answers:")
|