nismamjad commited on
Commit
00aeb49
·
verified ·
1 Parent(s): 8d5df3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -139
app.py CHANGED
@@ -7,10 +7,12 @@ from PIL import Image, ImageDraw, ImageFont
7
  from huggingface_hub import login
8
  import requests
9
  import json
10
- import base64 # <-- ADDED IMPORT for image handling
11
- import re
12
  import time
13
- import pandas as pd
 
 
14
  # Attempt to login using environment token
15
  try:
16
  HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
@@ -155,38 +157,21 @@ def get_gaia_api_questions():
155
  return None, f"An unexpected error occurred: {e}"
156
 
157
  def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
158
- """
159
- Fetches the content of the primary file associated with a task_id from the GAIA API.
160
- Returns raw_bytes, detected_mime_type, and file_name.
161
- associated_file_metadata_list is the 'files' list from the question data.
162
- """
163
- # If no metadata, assume no file to fetch for this specialized getter.
164
- # Or, if the API always serves THE file for task_id, then metadata is just for info.
165
- # Let's assume the API /files/{task_id} always gives the relevant file if one exists for the task.
166
-
167
  file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
168
  print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
169
-
170
  try:
171
  response = requests.get(file_url, timeout=30)
172
- response.raise_for_status() # This will error if file not found (404) or other issues
173
-
174
  raw_bytes = response.content
175
  detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
176
-
177
- # Try to get a filename from metadata if available, otherwise default
178
- file_name = "attached_file"
179
  if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
180
- # Assuming the first file in metadata is the one fetched, or provides its name
181
  first_file_meta = associated_file_metadata_list[0]
182
  if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
183
  file_name = first_file_meta['file_name']
184
-
185
  print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
186
  return raw_bytes, detected_mime_type, file_name
187
-
188
  except requests.exceptions.HTTPError as http_err:
189
- # Specifically handle 404 for "no file" vs other errors
190
  if http_err.response.status_code == 404:
191
  print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
192
  else:
@@ -199,136 +184,225 @@ def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata
199
  print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
200
  return None, None, None
201
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- def clean_final_answer(raw_text):
204
- # Step 1: Extract FINAL ANSWER section if present
205
- if "FINAL ANSWER:" in raw_text.upper():
206
- match = re.search(r"FINAL ANSWER:\s*(.*)", raw_text, re.IGNORECASE | re.DOTALL)
207
- if match:
208
- answer = match.group(1).strip()
209
- else:
210
- answer = raw_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  else:
212
- answer = raw_text.strip()
 
213
 
214
- # Step 2: Remove wrapping quotes if any
215
- if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
216
- answer = answer[1:-1].strip()
217
 
218
- # Step 3: Remove unwanted units unless explicitly required
219
- # You may adjust this depending on the task
220
- answer = answer.replace('%', '').replace('$', '').strip()
 
221
 
222
- # Step 4: Normalize spaces (e.g., for comma-separated answers)
223
- answer = re.sub(r'\s*,\s*', ',', answer)
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  return answer
226
- def my_agent_logic(task_id: str, question: str, files_metadata: list = None): # files_metadata is the list from task.get("files")
227
- """
228
- Uses the Gemini API, with GAIA-specific prompting and basic file handling,
229
- to generate an answer for the given question.
230
- """
231
- print(f"Agent (GAIA-Grounded Gemini) processing Task ID: {task_id}, Question: {question}")
232
- if files_metadata: # This is the list of file metadata dicts
233
- print(f"File metadata associated with this task: {files_metadata}")
234
 
235
  gemini_api_key = os.environ.get("GEMINI_API_KEY")
236
  if not gemini_api_key:
237
- print("Error: GEMINI_API_KEY not found in environment variables. Please set it in Space Secrets.")
238
  return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
239
 
240
- # --- GAIA-specific System Prompt ---
241
- # Adapted from Figure 2 of GAIA Paper [cite: 103, 104, 105, 106, 107, 108]
242
  system_prompt_lines = [
243
  "You are a general AI assistant. I will ask you a question.",
244
- "Report your thoughts (for your own processing, not for the final answer), and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].", # Instructing the LLM about the template it should "think" in
245
- "However, your actual returned response to me (the user) should ONLY be [YOUR FINAL ANSWER] part, without the 'FINAL ANSWER:' prefix.", # Clarification for our use case
246
- "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.",
247
- "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.",
 
 
248
  "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
249
  "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
250
- "Be precise and ensure the answer strictly adheres to any format requested in the question.",
251
- "If external files are mentioned or provided, use their content if relevant and accessible to answer the question.",
252
  ]
253
- # We won't send this as a separate "system" message in Gemini's typical API structure,
254
- # but rather prepend it to the user question for a single turn.
255
 
256
- # --- Prepare parts for Gemini API payload ---
257
  gemini_parts = []
258
-
259
- # Prepend system prompt guidelines to the main question text part
260
- user_question_text = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
261
 
262
- # --- File Handling ---
 
263
  file_content_bytes, detected_mime_type, file_name = None, None, None
264
- if files_metadata: # If the question has associated file(s) metadata
 
265
  file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
266
 
267
- if file_content_bytes:
268
- if detected_mime_type and detected_mime_type.startswith("image/"): # Handle images
269
- try:
270
- base64_image = base64.b64encode(file_content_bytes).decode('utf-8')
271
- gemini_parts.append({"text": user_question_text}) # Question text first
272
- gemini_parts.append({
273
- "inline_data": {
274
- "mime_type": detected_mime_type,
275
- "data": base64_image
276
- }
277
- })
278
- print(f"Added image {file_name} ({detected_mime_type}) to Gemini prompt for task {task_id}.")
279
- except Exception as e_img:
280
- print(f"Error processing image file {file_name} for task {task_id}: {e_img}")
281
- gemini_parts.append({"text": user_question_text + f"\n[Agent note: An image file '{file_name}' was associated but could not be processed: {e_img}]"})
282
- elif detected_mime_type and detected_mime_type == "text/plain": # Handle plain text files
283
- try:
284
- text_content = file_content_bytes.decode('utf-8')
285
- user_question_text += f"\n\nContent of attached text file '{file_name}':\n{text_content}"
286
- gemini_parts.append({"text": user_question_text})
287
- print(f"Added text file content '{file_name}' to Gemini prompt for task {task_id}.")
288
- except Exception as e_txt:
289
- print(f"Error decoding text file {file_name} for task {task_id}: {e_txt}")
290
- gemini_parts.append({"text": user_question_text + f"\n[Agent note: A text file '{file_name}' was associated but could not be decoded: {e_txt}]"})
291
- ###########################################################################################################################
292
- elif detected_mime_type in [
293
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
294
- "application/vnd.ms-excel",
295
- "text/csv"]:
296
- try:
297
- from io import BytesIO
298
- if "csv" in detected_mime_type:
299
- df = pd.read_csv(BytesIO(file_content_bytes))
300
- else:
301
- df = pd.read_excel(BytesIO(file_content_bytes))
302
-
303
- preview = df.head(10).to_string(index=False)
304
- user_question_text += f"\n\nContent preview from the attached spreadsheet '{file_name}':\n{preview}"
305
- gemini_parts.append({"text": user_question_text})
306
- print(f"Added spreadsheet content preview for {file_name} to Gemini prompt.")
307
- except Exception as e_xls:
308
- print(f"Error reading spreadsheet file {file_name} for task {task_id}: {e_xls}")
309
- user_question_text += f"\n\n[Agent note: Unable to parse spreadsheet '{file_name}': {e_xls}]"
310
- gemini_parts.append({"text": user_question_text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
 
 
 
 
 
 
 
312
 
313
- ###########################################################################################################################
314
- else: # No file content fetched or no files associated
315
- gemini_parts.append({"text": user_question_text})
316
 
317
  payload = {
318
  "contents": [{"role": "user", "parts": gemini_parts}],
319
- "generationConfig": {
320
- "temperature": 0.2, # Lower temperature for more factual/deterministic GAIA answers
321
- "maxOutputTokens": 300, # Increased slightly for potentially more complex answers
322
- }
323
  }
324
-
325
  api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
326
  agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
327
 
328
  try:
329
  headers = {"Content-Type": "application/json"}
330
- print(f"Calling Gemini API for task {task_id}...")
331
- response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=60)
332
  response.raise_for_status()
333
  result = response.json()
334
 
@@ -336,34 +410,23 @@ def my_agent_logic(task_id: str, question: str, files_metadata: list = None): #
336
  result["candidates"][0].get("content") and
337
  result["candidates"][0]["content"].get("parts") and
338
  result["candidates"][0]["content"]["parts"][0].get("text")):
339
-
340
- raw_answer = result["candidates"][0]["content"]["parts"][0]["text"].strip()
341
- print(f"Raw Gemini output: {raw_answer}") # Debugging log
342
- agent_computed_answer = clean_final_answer(raw_answer)
343
-
344
  else:
345
- print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {result}")
346
  if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
347
  block_reason = result["promptFeedback"]["blockReason"]
348
- print(f"Gemini API blocked the prompt for task {task_id}. Reason: {block_reason}")
349
  agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
350
  else:
351
  agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
352
  except requests.exceptions.Timeout:
353
- print(f"Timeout error calling Gemini API for task {task_id}.")
354
  agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
355
  except requests.exceptions.RequestException as e:
356
- print(f"Error calling Gemini API for task {task_id}: {e}")
357
- if e.response is not None:
358
- print(f"Gemini API Error Response Status: {e.response.status_code}")
359
- try: print(f"Gemini API Error Response Body: {e.response.json()}")
360
- except json.JSONDecodeError: print(f"Gemini API Error Response Body (text): {e.response.text}")
361
  agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
362
  except Exception as e:
363
- print(f"An unexpected error occurred in my_agent_logic for task {task_id}: {e}")
364
  agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
365
-
366
- print(f"Agent (GAIA-Grounded Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
367
  return agent_computed_answer
368
 
369
  def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
@@ -389,20 +452,25 @@ def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
389
  log_messages.append(f"Processing 1 random question based on user choice.")
390
  elif run_all_questions:
391
  log_messages.append(f"Processing all {len(tasks_to_process)} questions.")
 
 
 
 
 
392
  for task in tasks_to_process:
393
  task_id = task.get("task_id")
394
  question = task.get("question")
395
- associated_files_metadata = task.get("files", []) # This is the list of file metadata dicts
396
  if task_id and question:
397
  log_messages.append(f"\nProcessing Task ID: {task_id}")
398
  log_messages.append(f"Question: {question}")
399
  if associated_files_metadata:
400
  log_messages.append(f"Associated files metadata: {associated_files_metadata}")
401
- # Pass the files_metadata to the agent logic
402
  submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
403
  log_messages.append(f"Agent's Answer: {submitted_answer}")
404
  answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
405
- time.sleep(2)
 
406
  else:
407
  log_messages.append(f"Skipping malformed task: {task}")
408
  if not answers_to_submit:
@@ -461,7 +529,7 @@ def submit_agent_answers(profile: gr.OAuthProfile, answers_for_submission_state)
461
  submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
462
  return "\n".join(submission_log_messages)
463
 
464
- # --- Gradio Interface (largely unchanged from your latest version) ---
465
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
466
  gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
467
  gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
@@ -474,7 +542,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
474
  with gr.Tabs():
475
  with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
476
  gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
477
- gr.Markdown("This agent uses the Gemini API (with GAIA-specific prompting and basic file handling) to generate answers.")
478
  run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
479
  run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
480
  gr.Markdown("### Agent Run Log & Generated Answers:")
 
7
  from huggingface_hub import login
8
  import requests
9
  import json
10
+ import base64
11
+ import re # For advanced string cleaning
12
  import time
13
+ import pandas as pd # For spreadsheet handling
14
+ from io import StringIO # For capturing print output from exec
15
+
16
  # Attempt to login using environment token
17
  try:
18
  HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
 
157
  return None, f"An unexpected error occurred: {e}"
158
 
159
  def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
 
 
 
 
 
 
 
 
 
160
  file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
161
  print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
 
162
  try:
163
  response = requests.get(file_url, timeout=30)
164
+ response.raise_for_status()
 
165
  raw_bytes = response.content
166
  detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
167
+ file_name = "attached_file" # Default
 
 
168
  if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
 
169
  first_file_meta = associated_file_metadata_list[0]
170
  if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
171
  file_name = first_file_meta['file_name']
 
172
  print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
173
  return raw_bytes, detected_mime_type, file_name
 
174
  except requests.exceptions.HTTPError as http_err:
 
175
  if http_err.response.status_code == 404:
176
  print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
177
  else:
 
184
  print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
185
  return None, None, None
186
 
187
+ def execute_python_code(code_string: str):
188
+ """
189
+ Safely executes a string of Python code and captures its standard output.
190
+ Returns the captured output or an error message.
191
+ """
192
+ print(f"Attempting to execute Python code:\n{code_string[:500]}...") # Log first 500 chars
193
+ # Create a new StringIO object to capture stdout
194
+ old_stdout = sys.stdout
195
+ sys.stdout = captured_output = StringIO()
196
+
197
+ execution_result = None
198
+ error_message = None
199
 
200
+ try:
201
+ # Execute the code in a restricted namespace
202
+ # For safety, you might want to further restrict the available builtins/modules
203
+ # For this benchmark, we assume the provided Python code is generally safe.
204
+ local_namespace = {}
205
+ exec(code_string, {"__builtins__": __builtins__}, local_namespace)
206
+
207
+ # Try to get a 'final_answer' variable if it exists, as some questions might expect it
208
+ if 'final_answer' in local_namespace:
209
+ execution_result = str(local_namespace['final_answer'])
210
+
211
+ except Exception as e:
212
+ print(f"Error executing Python code: {e}")
213
+ error_message = f"Execution Error: {type(e).__name__}: {e}"
214
+ finally:
215
+ # Restore stdout
216
+ sys.stdout = old_stdout
217
+
218
+ # Get the content of captured_output
219
+ printed_output = captured_output.getvalue().strip()
220
+
221
+ if execution_result:
222
+ # If 'final_answer' was found, prioritize it
223
+ return execution_result, None
224
+ elif printed_output:
225
+ # If 'final_answer' not found, but something was printed, return that
226
+ return printed_output, None
227
+ elif error_message:
228
+ # If there was an error during execution
229
+ return None, error_message
230
  else:
231
+ # If no 'final_answer', nothing printed, and no error (e.g., script only defines functions)
232
+ return "Python code executed without explicit output or 'final_answer' variable.", None
233
 
 
 
 
234
 
235
+ def clean_final_answer(raw_text: str) -> str:
236
+ """More robustly cleans the raw text output from the LLM."""
237
+ if not isinstance(raw_text, str):
238
+ return "" # Should not happen, but good to be safe
239
 
240
+ answer = raw_text.strip()
 
241
 
242
+ # Attempt to extract content after "FINAL ANSWER:" if it's still present
243
+ # This regex is more robust to variations in spacing and casing
244
+ final_answer_match = re.search(r"FINAL ANSWER:\s*(.*)", answer, re.IGNORECASE | re.DOTALL)
245
+ if final_answer_match:
246
+ answer = final_answer_match.group(1).strip()
247
+
248
+ # Remove common conversational prefixes more aggressively
249
+ common_prefixes = [
250
+ "The answer is", "The final answer is", "So, the answer is", "Therefore, the answer is",
251
+ "Based on the information, the answer is", "The correct answer is", "My answer is",
252
+ "Okay, the answer is", "Sure, the answer is", "Here is the answer:", "The solution is",
253
+ "Answer:", "Result:"
254
+ ]
255
+ for prefix in common_prefixes:
256
+ if answer.lower().startswith(prefix.lower()):
257
+ answer = answer[len(prefix):].strip()
258
+ # Remove potential colon or period after prefix
259
+ if answer.startswith(":") or answer.startswith("."):
260
+ answer = answer[1:].strip()
261
+ break # Stop after first prefix match
262
+
263
+ # Remove wrapping quotes (single or double)
264
+ if len(answer) >= 2:
265
+ if (answer.startswith('"') and answer.endswith('"')) or \
266
+ (answer.startswith("'") and answer.endswith("'")):
267
+ answer = answer[1:-1].strip()
268
+
269
+ # Specific GAIA formatting: remove units like $ or % unless specified otherwise by the question
270
+ # This is tricky to do generally, as some questions might require them.
271
+ # The prompt already tells Gemini about this. This is a fallback.
272
+ # For now, let's keep it simple and rely on the prompt.
273
+ # If a question asks for "USD with two decimal places", the LLM should include '$'.
274
+ # answer = answer.replace('$', '').replace('%', '').strip() # Re-evaluating if this is too aggressive
275
+
276
+ # Normalize spaces around commas for comma-separated lists
277
+ answer = re.sub(r'\s*,\s*', ',', answer)
278
+
279
+ # Remove trailing punctuation if it seems unintended (e.g. a lone period)
280
+ if len(answer) > 1 and answer.endswith(".") and not re.search(r"[a-zA-Z0-9]\.[a-zA-Z0-9]", answer): # Avoid stripping from e.g. "file.txt"
281
+ # Check if the part before the period is a number or a short phrase
282
+ # This is to avoid stripping periods from full sentences if the LLM disobeys "few words"
283
+ if not answer[:-1].strip().isdigit() and len(answer[:-1].strip().split()) > 3:
284
+ pass # Likely a sentence, keep period
285
+ else:
286
+ answer = answer[:-1].strip()
287
+
288
  return answer
289
+
290
+ def my_agent_logic(task_id: str, question: str, files_metadata: list = None):
291
+ print(f"Agent (Enhanced Tools + Gemini) processing Task ID: {task_id}, Question: {question}")
292
+ if files_metadata:
293
+ print(f"File metadata associated: {files_metadata}")
 
 
 
294
 
295
  gemini_api_key = os.environ.get("GEMINI_API_KEY")
296
  if not gemini_api_key:
 
297
  return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
298
 
 
 
299
  system_prompt_lines = [
300
  "You are a general AI assistant. I will ask you a question.",
301
+ "Your primary goal is to provide the single, exact, concise, and factual answer to the question.",
302
+ "Do not include any conversational fluff, disclaimers, explanations, or any introductory phrases like 'The answer is:'. Your response should be ONLY the answer itself.",
303
+ "Do not use markdown formatting unless the question explicitly asks for it.",
304
+ "If the question implies a specific format (e.g., a number, a date, a comma-separated list), provide the answer in that format.",
305
+ "Do NOT include the phrase 'FINAL ANSWER:' in your response to me.",
306
+ "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise by the question.",
307
  "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
308
  "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
309
+ "If external files or tool outputs are provided below, use their content if relevant and accessible to answer the question.",
 
310
  ]
 
 
311
 
312
+ user_question_text_for_gemini = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
313
  gemini_parts = []
 
 
 
314
 
315
+ # --- File & Tool Handling ---
316
+ tool_output_description = ""
317
  file_content_bytes, detected_mime_type, file_name = None, None, None
318
+
319
+ if files_metadata:
320
  file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
321
 
322
+ if file_content_bytes:
323
+ if file_name and file_name.lower().endswith(".py") and detected_mime_type in ["text/x-python", "application/x-python-code", "text/plain"]:
324
+ print(f"Detected Python file: {file_name}")
325
+ try:
326
+ python_code = file_content_bytes.decode('utf-8')
327
+ execution_result, exec_error = execute_python_code(python_code)
328
+ if exec_error:
329
+ tool_output_description += f"\n\nExecution of Python file '{file_name}' failed: {exec_error}"
330
+ elif execution_result:
331
+ tool_output_description += f"\n\nOutput from executing Python file '{file_name}':\n{execution_result}"
332
+ else:
333
+ tool_output_description += f"\n\nPython file '{file_name}' executed without specific return or error."
334
+ except Exception as e_py_decode:
335
+ tool_output_description += f"\n\nError decoding Python file '{file_name}': {e_py_decode}"
336
+
337
+ elif detected_mime_type and detected_mime_type.startswith("image/"):
338
+ try:
339
+ base64_image = base64.b64encode(file_content_bytes).decode('utf-8')
340
+ gemini_parts.append({"inline_data": {"mime_type": detected_mime_type, "data": base64_image}})
341
+ tool_output_description += f"\n\nAn image file '{file_name}' ({detected_mime_type}) is provided. Refer to it if relevant."
342
+ print(f"Added image {file_name} to Gemini parts for task {task_id}.")
343
+ except Exception as e_img:
344
+ tool_output_description += f"\n\n[Agent note: Error processing image file '{file_name}': {e_img}]"
345
+
346
+ elif detected_mime_type and detected_mime_type.startswith("audio/"): # mp3, m4a, wav, etc.
347
+ try:
348
+ base64_audio = base64.b64encode(file_content_bytes).decode('utf-8')
349
+ gemini_parts.append({"inline_data": {"mime_type": detected_mime_type, "data": base64_audio}})
350
+ tool_output_description += f"\n\nAn audio file '{file_name}' ({detected_mime_type}) is provided. Transcribe or analyze it if relevant to the question."
351
+ print(f"Added audio {file_name} to Gemini parts for task {task_id}.")
352
+ except Exception as e_audio:
353
+ tool_output_description += f"\n\n[Agent note: Error processing audio file '{file_name}': {e_audio}]"
354
+
355
+ elif detected_mime_type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel", "text/csv"]:
356
+ try:
357
+ if "csv" in detected_mime_type: df = pd.read_csv(io.BytesIO(file_content_bytes))
358
+ else: df = pd.read_excel(io.BytesIO(file_content_bytes))
359
+
360
+ # Provide a more comprehensive preview
361
+ preview_rows = min(10, len(df))
362
+ preview_cols = min(5, len(df.columns))
363
+ preview_df = df.iloc[:preview_rows, :preview_cols]
364
+ df_description = f"First {preview_rows} rows and first {preview_cols} columns (if available):\n{preview_df.to_string(index=True)}\nTotal rows: {len(df)}, Total columns: {len(df.columns)}."
365
+ if len(df.columns) > preview_cols:
366
+ df_description += f"\nOther columns include: {list(df.columns[preview_cols:])}"
367
+
368
+ tool_output_description += f"\n\nData from spreadsheet '{file_name}':\n{df_description}"
369
+ print(f"Added spreadsheet preview for {file_name} to tool output description.")
370
+ except Exception as e_xls:
371
+ tool_output_description += f"\n\n[Agent note: Unable to parse spreadsheet '{file_name}': {e_xls}]"
372
+
373
+ elif detected_mime_type == "text/plain":
374
+ try:
375
+ text_content = file_content_bytes.decode('utf-8')
376
+ tool_output_description += f"\n\nContent of attached text file '{file_name}':\n{text_content[:2000]}" # Limit length
377
+ print(f"Added text file content '{file_name}' to tool output description.")
378
+ except Exception as e_txt:
379
+ tool_output_description += f"\n\n[Agent note: A text file '{file_name}' was associated but could not be decoded: {e_txt}]"
380
+ else:
381
+ tool_output_description += f"\n\nNote: A file named '{file_name}' (type: {detected_mime_type or 'unknown'}) is associated. Its content could not be directly processed by current tools."
382
+ elif files_metadata : # File metadata exists but no bytes fetched (e.g. 404)
383
+ tool_output_description += f"\n\nNote: File(s) {files_metadata} were listed for this task, but could not be fetched or processed."
384
+
385
 
386
+ # Append the main question and any tool/file processing notes as a single text part if no multimodal data was added yet,
387
+ # or as the first text part if multimodal data (image/audio) is present.
388
+ final_user_text_for_gemini = user_question_text_for_gemini + tool_output_description
389
+ if not any(p.get("inline_data") for p in gemini_parts): # If no image/audio was added
390
+ gemini_parts.append({"text": final_user_text_for_gemini})
391
+ else: # If image/audio was added, insert text part at the beginning
392
+ gemini_parts.insert(0, {"text": final_user_text_for_gemini})
393
 
 
 
 
394
 
395
  payload = {
396
  "contents": [{"role": "user", "parts": gemini_parts}],
397
+ "generationConfig": {"temperature": 0.1, "maxOutputTokens": 350} # Very low temp for GAIA
 
 
 
398
  }
 
399
  api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
400
  agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
401
 
402
  try:
403
  headers = {"Content-Type": "application/json"}
404
+ print(f"Calling Gemini API for task {task_id} with payload structure: {[(k, type(v)) for p in payload['contents'] for part in p['parts'] for k,v in part.items()]}")
405
+ response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=90) # Increased timeout slightly
406
  response.raise_for_status()
407
  result = response.json()
408
 
 
410
  result["candidates"][0].get("content") and
411
  result["candidates"][0]["content"].get("parts") and
412
  result["candidates"][0]["content"]["parts"][0].get("text")):
413
+ raw_answer_from_gemini = result["candidates"][0]["content"]["parts"][0]["text"].strip()
414
+ agent_computed_answer = clean_final_answer(raw_answer_from_gemini)
 
 
 
415
  else:
416
+ print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {json.dumps(result, indent=2)}")
417
  if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
418
  block_reason = result["promptFeedback"]["blockReason"]
 
419
  agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
420
  else:
421
  agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
422
  except requests.exceptions.Timeout:
 
423
  agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
424
  except requests.exceptions.RequestException as e:
425
+ if e.response is not None: print(f"Gemini API Error Response Status: {e.response.status_code}, Body: {e.response.text}")
 
 
 
 
426
  agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
427
  except Exception as e:
 
428
  agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
429
+ print(f"Agent (Enhanced Tools + Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
 
430
  return agent_computed_answer
431
 
432
  def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
 
452
  log_messages.append(f"Processing 1 random question based on user choice.")
453
  elif run_all_questions:
454
  log_messages.append(f"Processing all {len(tasks_to_process)} questions.")
455
+
456
+ # Need to import sys for execute_python_code's stdout capture
457
+ global sys
458
+ import sys
459
+
460
  for task in tasks_to_process:
461
  task_id = task.get("task_id")
462
  question = task.get("question")
463
+ associated_files_metadata = task.get("files", [])
464
  if task_id and question:
465
  log_messages.append(f"\nProcessing Task ID: {task_id}")
466
  log_messages.append(f"Question: {question}")
467
  if associated_files_metadata:
468
  log_messages.append(f"Associated files metadata: {associated_files_metadata}")
 
469
  submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
470
  log_messages.append(f"Agent's Answer: {submitted_answer}")
471
  answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
472
+ if run_all_questions: # Add a small delay if processing all questions to be kind to APIs
473
+ time.sleep(1) # 1-second delay between processing each question
474
  else:
475
  log_messages.append(f"Skipping malformed task: {task}")
476
  if not answers_to_submit:
 
529
  submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
530
  return "\n".join(submission_log_messages)
531
 
532
+ # --- Gradio Interface (largely unchanged) ---
533
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
534
  gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
535
  gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
 
542
  with gr.Tabs():
543
  with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
544
  gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
545
+ gr.Markdown("This agent uses the Gemini API with enhanced tool handling (Python execution, audio, spreadsheets) to generate answers.")
546
  run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
547
  run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
548
  gr.Markdown("### Agent Run Log & Generated Answers:")