|
import requests |
|
import sys |
|
from pathlib import Path |
|
from langchain_core.messages import HumanMessage, SystemMessage |
|
from langchain_core.runnables import RunnableConfig |
|
from langfuse.callback import CallbackHandler |
|
|
|
|
|
def run(provider, questions_data: list, question_number: int): |
|
|
|
try: |
|
if provider == "openai": |
|
|
|
from agent_OAI_03 import create_and_compile_oai_agent |
|
agent = create_and_compile_oai_agent() |
|
elif provider == "deepseek": |
|
from agent_DS_01 import create_and_compile_ds_agent |
|
agent = create_and_compile_ds_agent() |
|
|
|
print('agent compiled') |
|
except Exception as e: |
|
print(f"Error instantiating agent: {e}") |
|
return f"Error initializing agent: {e}", None |
|
|
|
|
|
results_log = [] |
|
answers_payload = [] |
|
|
|
prompt = f""" |
|
As Alfred the butler: |
|
1.- analyze the human user question |
|
2.- design a plan for getting the solution, including the use of the provided tools (described below) |
|
3.- and, based on your reasoning and the information gathered using the available tools, |
|
provide exactly the answer in the explained format |
|
(there be always a specific format for a direct response: a number, a word, etc.) |
|
|
|
You are sported with great tools: |
|
- Based on the words of the human you can look for related pages in Internet with TavilySearch: it will |
|
give you a list of URL as well the pages sneak peak of its content. Since the sneak peaks rarely contain the |
|
final answer, in any case it will help you to choose what URL to visit next using the next tool: |
|
- RequestsGetTool, for capturing the text, along the page DOM, so you can complete your knowledge and make a new decision |
|
- Describe a Photo, for depicting the content of a picture for further questions. The tool just needs the file name, |
|
it will take care of loading it from file system and send it to the VLM. |
|
- Transcript an Audio, for transcripting the content of an audio file for further questions. The tool just needs |
|
the file name, it will take care of loading it from file system and send it to the transcriptor. |
|
- Read an excel file, for attaching the content in a text string to the messages. The tool just needs name, it will |
|
take care of loading it from file system and send it to the agent. |
|
|
|
**Very important**, just answer with the specific information in the specified format. |
|
It will be parsed with a deterministic program and it will evaluate your skills. |
|
For example |
|
- User: "How many items .....?" |
|
- Your answer: "5" |
|
- User: "comma separated list of ....." |
|
- Your answer: "item1, item2, item3" (use a blank space on the right of the comma) |
|
Another format rule for your answer: The first letter of the final answer should be upper case unless the word is in the question. |
|
""" |
|
|
|
match = 0 |
|
|
|
for i, item in enumerate(questions_data[:question_number]): |
|
print(item) |
|
task_id = item.get("task_id") |
|
question_text = item.get("question") |
|
if not task_id or question_text is None: |
|
print(f"Skipping item with missing task_id or question: {item}") |
|
continue |
|
try: |
|
question_content = question_text |
|
if item.get('file_name'): |
|
if (item.get('file_name').endswith('.png') or |
|
item.get('file_name').endswith('.mp3') or |
|
item.get('file_name').endswith('.xlsx') or |
|
item.get('file_name').endswith('.py')): |
|
question_content = [ |
|
{"type": "text", "text": question_text}, |
|
{"type": "text", "text": f"the file name is: {item.get('file_name')}"} |
|
] |
|
messages = [SystemMessage(content=prompt), HumanMessage(content=question_content)] |
|
print('invocando') |
|
|
|
|
|
|
|
|
|
|
|
langfuse_handler = CallbackHandler() |
|
|
|
|
|
submitted_answer = agent.invoke( |
|
|
|
input={"messages": messages}, |
|
config=RunnableConfig(callbacks=[langfuse_handler]) |
|
) |
|
|
|
|
|
print('response') |
|
print(submitted_answer['messages'][-1].content) |
|
|
|
|
|
|
|
|
|
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer['messages'][-1].content}) |
|
results_log.append({"Task ID": task_id, "Question": question_text, |
|
"Submitted Answer": submitted_answer['messages'][-1].content}) |
|
except Exception as e: |
|
print(f"Error running agent on task {task_id}: {e}") |
|
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}) |
|
|
|
print('**** OUTPUT ***') |
|
print(answers_payload) |
|
print('\n\n ===== ---- TOTAL MATCHES:', match) |
|
return None |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
api_url = DEFAULT_API_URL |
|
questions_url = f"{api_url}/questions" |
|
submit_url = f"{api_url}/submit" |
|
|
|
|
|
print(f"Fetching questions from: {questions_url}") |
|
try: |
|
response = requests.get(questions_url, timeout=15) |
|
response.raise_for_status() |
|
questions_data = response.json() |
|
if not questions_data: |
|
print("Fetched questions list is empty.") |
|
sys.exit("Fetched questions list is empty or invalid format.") |
|
print(f"Fetched {len(questions_data)} questions.") |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error fetching questions: {e}") |
|
sys.exit(f"Error fetching questions: {e}") |
|
except requests.exceptions.JSONDecodeError as e: |
|
print(f"Error decoding JSON response from questions endpoint: {e}") |
|
print(f"Response text: {response.text[:500]}") |
|
sys.exit(f"Error decoding server response for questions: {e}") |
|
except Exception as e: |
|
print(f"An unexpected error occurred fetching questions: {e}") |
|
sys.exit(f"An unexpected error occurred fetching questions: {e}") |
|
|
|
|
|
run("openai", questions_data, question_number=20) |
|
|
|
|
|
|