alex-i07 commited on
Commit
b4395cf
·
1 Parent(s): 81917a3

without submitting, just to test

Browse files
Files changed (6) hide show
  1. agent.py +116 -0
  2. app.py +42 -14
  3. final_answer_validation_prompt.txt +7 -0
  4. requirements.txt +11 -1
  5. system_prompt.txt +2 -0
  6. tools.py +280 -0
agent.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langgraph.prebuilt import ToolNode
4
+ from typing import TypedDict, Annotated, Literal
5
+ from langchain.chat_models import init_chat_model
6
+ from langgraph.graph import add_messages, StateGraph, START, END
7
+ from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
8
+
9
+ from tools import (
10
+ default_file_reader,
11
+ image_reader,
12
+ excel_column_reader,
13
+ excel_find_column_values_sum,
14
+ wiki_search,
15
+ archive_search,
16
+ get_ioc_code,
17
+ check_commutativity,
18
+ audio_to_text,
19
+ video_to_text
20
+ )
21
+
22
+ load_dotenv()
23
+
24
+ class AgentState(TypedDict):
25
+ messages: Annotated[list[AnyMessage], add_messages]
26
+
27
+
28
+ def start_agent(question: str, question_filepath: str):
29
+ chat = init_chat_model("claude-3-5-sonnet-20241022", model_provider="anthropic", temperature=0)
30
+
31
+ tools = [wiki_search, archive_search, get_ioc_code, check_commutativity, video_to_text]
32
+
33
+ if question_filepath:
34
+ #if a file is provided, then add file tools
35
+ tools = tools + [default_file_reader, image_reader, excel_column_reader, excel_find_column_values_sum, audio_to_text]
36
+
37
+ chat_with_tools = chat.bind_tools(tools)
38
+
39
+ try:
40
+ with open("system_prompt.txt", 'r') as sp_file:
41
+ system_prompt = sp_file.read()
42
+ except FileNotFoundError:
43
+ print("Error: unable to open system_prompt.txt")
44
+ return None
45
+
46
+ if question_filepath:
47
+ messages = [
48
+ SystemMessage(system_prompt),
49
+ HumanMessage(content=f"{question} File located at: {question_filepath}")
50
+ ]
51
+ else:
52
+ messages = [
53
+ SystemMessage(system_prompt),
54
+ HumanMessage(content=f"{question}")
55
+ ]
56
+
57
+ def assistant(state: AgentState):
58
+ return {
59
+ **state,
60
+ "messages": [chat_with_tools.invoke(state["messages"])],
61
+ }
62
+
63
+ def validate_answer_format(state: AgentState):
64
+ try:
65
+ with open("final_answer_validation_prompt.txt", 'r') as favp_file:
66
+ final_answer_validation_prompt = favp_file.read()
67
+ except FileNotFoundError:
68
+ print(f"Error: unable to open final_answer_validation_prompt.txt")
69
+ return None
70
+
71
+ state["messages"].append(
72
+ HumanMessage(content=f"Verify your FINAL ANSWER again so it meet user question requirements: {question}")
73
+ )
74
+
75
+ state["messages"].append(
76
+ HumanMessage(content=f"Verify your FINAL ANSWER again so it meets these requirements: {final_answer_validation_prompt}. "
77
+ f"Do not use any tool here, just validate format of the final answer.")
78
+ )
79
+
80
+ return {
81
+ **state,
82
+ "messages": [chat_with_tools.invoke(state["messages"])],
83
+ }
84
+
85
+ def custom_tool_condition(state: AgentState, messages_key: str = "messages") -> Literal["tools", "validate"]:
86
+ if isinstance(state, list):
87
+ ai_message = state[-1]
88
+ elif isinstance(state, dict) and (messages := state.get(messages_key, [])):
89
+ ai_message = messages[-1]
90
+ elif messages := getattr(state, messages_key, []):
91
+ ai_message = messages[-1]
92
+ else:
93
+ raise ValueError(f"No messages found in input state to tool_edge: {state}")
94
+ if hasattr(ai_message, "tool_calls") and len(ai_message.tool_calls) > 0:
95
+ return "tools"
96
+ return "validate"
97
+
98
+ initial_state = AgentState(
99
+ messages=messages,
100
+ )
101
+
102
+ builder = StateGraph(AgentState)
103
+
104
+ builder.add_node("assistant", assistant)
105
+ builder.add_node("tools", ToolNode(tools))
106
+ builder.add_node("validate", validate_answer_format)
107
+
108
+ builder.add_edge(START, "assistant")
109
+ builder.add_conditional_edges("assistant", custom_tool_condition)
110
+
111
+ builder.add_edge("tools", "assistant")
112
+ builder.add_edge("validate", END)
113
+ agent = builder.compile()
114
+ response = agent.invoke(initial_state)
115
+
116
+ return response['messages'][-1].content
app.py CHANGED
@@ -1,23 +1,42 @@
1
  import os
2
- import gradio as gr
3
- import requests
4
  import inspect
 
 
 
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
 
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -38,13 +57,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
  agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
  print(agent_code)
50
 
@@ -75,14 +94,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
75
  print(f"Running agent on {len(questions_data)} questions...")
76
  for item in questions_data:
77
  task_id = item.get("task_id")
 
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
 
 
 
82
  try:
83
- submitted_answer = agent(question_text)
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
86
  except Exception as e:
87
  print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -90,7 +117,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
-
94
  # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
@@ -138,6 +165,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
138
  print(status_message)
139
  results_df = pd.DataFrame(results_log)
140
  return status_message, results_df
 
141
 
142
 
143
  # --- Build Gradio Interface using Blocks ---
@@ -154,7 +182,7 @@ with gr.Blocks() as demo:
154
  ---
155
  **Disclaimers:**
156
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
 
1
  import os
 
 
2
  import inspect
3
+ import tempfile
4
+ import requests
5
+ import gradio as gr
6
  import pandas as pd
7
+ from agent import start_agent
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
 
 
13
  class BasicAgent:
14
  def __init__(self):
15
+ print("BasicAgent initialized")
16
+ def __call__(self, question: str, question_filepath: str = "") -> str:
17
+ answer = start_agent(question, question_filepath)
18
  print(f"Agent received question (first 50 chars): {question[:50]}...")
19
+ print(f"Agent returning answer: {answer}")
20
+ return answer
21
+
22
+
23
+ def download_question_file(task_id: str, filename: str)-> str | None:
24
+ try:
25
+ response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", stream=True)
26
+ response.raise_for_status()
27
+
28
+ temp_dir = tempfile.gettempdir()
29
+ filepath = os.path.join(temp_dir, filename)
30
+
31
+ with open(filepath, 'wb') as file:
32
+ for chunk in response.iter_content(chunk_size=8192):
33
+ file.write(chunk)
34
+
35
+ return filepath
36
+ except requests.exceptions.RequestException as e:
37
+ print(f"Error downloading file: {e}")
38
+ return None
39
+
40
 
41
  def run_and_submit_all( profile: gr.OAuthProfile | None):
42
  """
 
57
  questions_url = f"{api_url}/questions"
58
  submit_url = f"{api_url}/submit"
59
 
60
+ # 1. Instantiate Agent (modify this part to create your agent)
61
  try:
62
  agent = BasicAgent()
63
  except Exception as e:
64
  print(f"Error instantiating agent: {e}")
65
  return f"Error initializing agent: {e}", None
66
+ # In the case of an app running as a hugging Face space, this link points toward your codebase (usefull for others, so please keep it public)
67
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
68
  print(agent_code)
69
 
 
94
  print(f"Running agent on {len(questions_data)} questions...")
95
  for item in questions_data:
96
  task_id = item.get("task_id")
97
+ file_name = item.get("file_name")
98
  question_text = item.get("question")
99
+ question_filepath = ""
100
  if not task_id or question_text is None:
101
  print(f"Skipping item with missing task_id or question: {item}")
102
  continue
103
+
104
+ if file_name:
105
+ question_filepath = download_question_file(task_id=task_id, filename=file_name)
106
+
107
  try:
108
+
109
+ submitted_answer = agent(question_text, question_filepath)
110
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
111
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
112
+
113
  except Exception as e:
114
  print(f"Error running agent on task {task_id}: {e}")
115
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
117
  if not answers_payload:
118
  print("Agent did not produce any answers to submit.")
119
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
120
+ """
121
  # 4. Prepare Submission
122
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
123
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
 
165
  print(status_message)
166
  results_df = pd.DataFrame(results_log)
167
  return status_message, results_df
168
+ """
169
 
170
 
171
  # --- Build Gradio Interface using Blocks ---
 
182
  ---
183
  **Disclaimers:**
184
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
185
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
186
  """
187
  )
188
 
final_answer_validation_prompt.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
2
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma and space separated list of numbers and/or strings.
3
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
4
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities),
5
+ and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
6
+ If the CITY name consists of two or more words, then output them all, but this is applicable ONLY TO CITIES.
7
+ If you solve math problem output answer that tool returned to you.
requirements.txt CHANGED
@@ -1,2 +1,12 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ requests
4
+ pandas
5
+ pytubefix
6
+ beautifulsoup4
7
+ langchain
8
+ langchain-core
9
+ langchain_community
10
+ langchain-openai
11
+ langchain-anthropic
12
+ langgraph
system_prompt.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
2
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
tools.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import requests
4
+ import tempfile
5
+ import pandas as pd
6
+ from openai import OpenAI
7
+ from pytubefix import YouTube
8
+ from langchain_community.tools import tool
9
+ from bs4 import BeautifulSoup, ResultSet, PageElement, Tag, NavigableString
10
+
11
+ @tool
12
+ def default_file_reader(file_path: str) -> str | None:
13
+ """
14
+ Default file reader tool that opens a file as a text reads it content and return it as a string.
15
+ Use this default tool if there is no specific file reader for a given file.
16
+ """
17
+
18
+ try:
19
+ with open(file_path, 'r') as file:
20
+ return file.read()
21
+ except FileNotFoundError as e:
22
+ print(f"Error:{e}")
23
+ return None
24
+
25
+
26
+ @tool
27
+ def image_reader(file_path: str) -> dict[str, str | dict[str, str]] | None:
28
+ """
29
+ Opens and png image and returns it's data as a dictionary.
30
+ """
31
+
32
+ try:
33
+ with open(file_path, "rb") as image_file:
34
+ image_data = base64.b64encode(image_file.read()).decode('utf-8')
35
+
36
+ return {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_data}"}}
37
+ except FileNotFoundError as e:
38
+ print(f"Error:{e}")
39
+ return None
40
+
41
+
42
+ @tool
43
+ def excel_column_reader(file_path: str) -> str | None:
44
+ """
45
+ Opens an Excel file, reads the first row to get the names of the columns and return it as a string.
46
+ Use it to find out what data is available in the Excel file.
47
+ """
48
+
49
+ try:
50
+ df = pd.read_excel(file_path)
51
+ return ' '.join(df.columns.astype(str))
52
+ except FileNotFoundError as e:
53
+ print(f"Error:{e}")
54
+ return None
55
+
56
+
57
+ @tool
58
+ def excel_find_column_values_sum(file_path: str, columns: list[str]) -> None | int:
59
+ """Opens an Excel file, find specified columns by column_name and calculates a total sum of all numeric cells of specified columns"""
60
+
61
+ try:
62
+ total = 0
63
+ df = pd.read_excel(file_path)
64
+
65
+ for column in columns:
66
+ total += df[column].sum()
67
+
68
+ return total
69
+ except FileNotFoundError as e:
70
+ print(f"Error:{e}")
71
+ return None
72
+
73
+
74
+ @tool
75
+ def wiki_search(query: str) -> str | None:
76
+ """
77
+ Search wikipedia by query string and return content of the first found page.
78
+ Also use it to get information about shows and actors.
79
+ """
80
+
81
+ try:
82
+ ddg_results = []
83
+ wiki_results = ""
84
+ link_rows = _fetch_ddg_search_result_links(f"wikipedia {query}")
85
+ for link_row in link_rows:
86
+ if not 'en.wikipedia.org' in link_row.attrs['href']:
87
+ continue
88
+
89
+ ddg_results.append({
90
+ 'title': link_row.get_text(strip=True),
91
+ 'url': link_row.attrs['href']
92
+ })
93
+
94
+ wiki_results += _fetch_specific_page(link_row.attrs['href'])
95
+ if len(ddg_results) == 1:
96
+ break
97
+
98
+ return wiki_results
99
+ except requests.exceptions.RequestException as e:
100
+ print(f"Error during request: {e}")
101
+ return None
102
+ except Exception as e:
103
+ print(f"Error parsing results: {e}")
104
+ return None
105
+
106
+
107
+ @tool
108
+ def archive_search(query: str) -> str | None:
109
+ """
110
+ Search archive.org by query string and return content of the first found page.
111
+ Use this search when you need to find scientific paper or specific scientific publication detail.
112
+ """
113
+
114
+ try:
115
+ ddg_results = []
116
+ archive_results = ""
117
+ link_rows = _fetch_ddg_search_result_links(f"archive.org {query}")
118
+ for link_row in link_rows:
119
+ if not 'archive.org' in link_row.attrs['href']:
120
+ continue
121
+
122
+ ddg_results.append({
123
+ 'title': link_row.get_text(strip=True),
124
+ 'url': link_row.attrs['href']
125
+ })
126
+
127
+ archive_results += _fetch_specific_page(link_row.attrs['href'])
128
+
129
+ if len(ddg_results) == 1:
130
+ break
131
+
132
+ return archive_results
133
+ except requests.exceptions.RequestException as e:
134
+ print(f"Error during request: {e}")
135
+ return None
136
+ except Exception as e:
137
+ print(f"Error parsing results: {e}")
138
+ return None
139
+
140
+
141
+ @tool
142
+ def get_ioc_code(country_name: str) -> str | None:
143
+ """
144
+ Accepts country name as a string and returns IOC code of this country.
145
+ """
146
+
147
+ try:
148
+ ioc_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_IOC_country_codes')[0]
149
+ ioc_df['Code'] = ioc_df['Code'].str[-3:]
150
+ name_to_code = dict(zip(ioc_df['National Olympic Committee'], ioc_df['Code']))
151
+
152
+ return name_to_code.get(country_name)
153
+ except Exception as e:
154
+ print(f"Error: {e}")
155
+ return None
156
+
157
+ @tool
158
+ def check_commutativity(table_definition: str) -> str | None:
159
+ """
160
+ Use this tool if you need to verify whether a binary operation defined by a table is commutative.
161
+ Returns dictionary with two fields: "is_commutative"(boolean) and
162
+ "counter_example_elements" list of elements that violates x∗y=y∗x that prove * is not commutative
163
+ Example of table definition:
164
+ |*|a|b|c|d|e|
165
+ |---|---|---|---|---|---|
166
+ |a|a|b|c|b|d|
167
+ |b|b|c|a|e|c|
168
+ |c|c|a|b|b|a|
169
+ |d|b|e|b|e|d|
170
+ |e|d|b|a|d|c|
171
+ """
172
+ lines = [line.strip() for line in table_definition.strip().splitlines() if
173
+ line.strip().startswith('|') and not line.strip().startswith('|-')]
174
+
175
+ # Parse header: skip the '*' cell
176
+ header_cells = [cell.strip() for cell in lines[0].split('|')[1:] if cell.strip()]
177
+ S = header_cells[1:] # Skip the first header cell which is "*"
178
+
179
+ operation_table = {}
180
+ for row in lines[1:]:
181
+ cells = [cell.strip() for cell in row.split('|')[1:] if cell.strip()]
182
+ row_label = cells[0]
183
+ values = cells[1:]
184
+ if len(values) != len(S):
185
+ raise ValueError(f"Row {row_label} does not have the correct number of entries.")
186
+ operation_table[row_label] = dict(zip(S, values))
187
+
188
+ counter_example_elements = set()
189
+
190
+ for x in S:
191
+ for y in S:
192
+ if operation_table[x][y] != operation_table[y][x]:
193
+ counter_example_elements.update([x, y])
194
+
195
+
196
+ return ', '.join(sorted(counter_example_elements)) if len(counter_example_elements) > 0 else None
197
+
198
+ @tool
199
+ def audio_to_text(file_path: str) -> str | None:
200
+ """
201
+ Transcribes audio file to text and returns text as a string.
202
+ """
203
+ try:
204
+ client = OpenAI()
205
+ audio_file = open(file_path, "rb")
206
+
207
+ transcription = client.audio.transcriptions.create(
208
+ model="gpt-4o-transcribe",
209
+ file=audio_file
210
+ )
211
+
212
+ return transcription.text
213
+ except Exception as e:
214
+ print(f"Error: {e}")
215
+ return None
216
+
217
+ @tool
218
+ def video_to_text(video_url: str) -> str | None:
219
+ """
220
+ Downloads YouTube video by url, transcribes it to text and returns text as a string.
221
+ """
222
+ file_path = ""
223
+ try:
224
+ ytx = YouTube(video_url)
225
+ temp_dir = tempfile.gettempdir()
226
+ ysx = ytx.streams.get_highest_resolution()
227
+ file_path = ysx.download(output_path=temp_dir)
228
+
229
+ client = OpenAI()
230
+ video_file = open(file_path, "rb")
231
+
232
+ transcription = client.audio.transcriptions.create(
233
+ model="gpt-4o-transcribe",
234
+ file=video_file,
235
+ temperature=0.0,
236
+ prompt="Ignore music playing in the background and transcribe all conversations."
237
+ )
238
+
239
+ return transcription.text
240
+ except FileNotFoundError:
241
+ print(f"Error: File {file_path} was not found.")
242
+ return None
243
+ except Exception as e:
244
+ print(f"Error: {e}")
245
+ return None
246
+
247
+
248
+ def _fetch_ddg_search_result_links(query: str) -> ResultSet[PageElement | Tag | NavigableString]:
249
+ url = "https://lite.duckduckgo.com/lite/"
250
+
251
+ headers = {
252
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
253
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
254
+ 'Accept-Language': 'en-US,en;q=0.5',
255
+ }
256
+
257
+ params = {
258
+ 'q': query,
259
+ 'kl': 'us-en'
260
+ }
261
+
262
+ ddg_response = requests.get(url, headers=headers, params=params)
263
+ ddg_response.raise_for_status()
264
+
265
+ soup = BeautifulSoup(ddg_response.text, 'html.parser')
266
+ return soup.find_all('a', {'class': 'result-link'})
267
+
268
+
269
+ def _fetch_specific_page(url: str) -> str:
270
+ headers = {
271
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
272
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
273
+ 'Accept-Language': 'en-US,en;q=0.5',
274
+ }
275
+
276
+ wiki_response = requests.get(url, headers=headers)
277
+ wiki_response.raise_for_status()
278
+
279
+ soup = BeautifulSoup(wiki_response.text, 'html.parser')
280
+ return soup.get_text()