aleixlopezpascual commited on
Commit
ae374df
·
1 Parent(s): 8232946
Files changed (2) hide show
  1. agent.py +119 -28
  2. test.ipynb +0 -0
agent.py CHANGED
@@ -18,6 +18,7 @@ from supabase.client import Client, create_client
18
 
19
  load_dotenv()
20
 
 
21
  @tool
22
  def multiply(a: int, b: int) -> int:
23
  """Multiply two numbers.
@@ -28,30 +29,33 @@ def multiply(a: int, b: int) -> int:
28
  """
29
  return a * b
30
 
 
31
  @tool
32
  def add(a: int, b: int) -> int:
33
  """Add two numbers.
34
-
35
  Args:
36
  a: first int
37
  b: second int
38
  """
39
  return a + b
40
 
 
41
  @tool
42
  def subtract(a: int, b: int) -> int:
43
  """Subtract two numbers.
44
-
45
  Args:
46
  a: first int
47
  b: second int
48
  """
49
  return a - b
50
 
 
51
  @tool
52
- def divide(a: int, b: int) -> int:
53
  """Divide two numbers.
54
-
55
  Args:
56
  a: first int
57
  b: second int
@@ -60,20 +64,22 @@ def divide(a: int, b: int) -> int:
60
  raise ValueError("Cannot divide by zero.")
61
  return a / b
62
 
 
63
  @tool
64
  def modulus(a: int, b: int) -> int:
65
  """Get the modulus of two numbers.
66
-
67
  Args:
68
  a: first int
69
  b: second int
70
  """
71
  return a % b
72
 
 
73
  @tool
74
- def wiki_search(query: str) -> str:
75
  """Search Wikipedia for a query and return maximum 2 results.
76
-
77
  Args:
78
  query: The search query."""
79
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
@@ -81,37 +87,123 @@ def wiki_search(query: str) -> str:
81
  [
82
  f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
83
  for doc in search_docs
84
- ])
 
85
  return {"wiki_results": formatted_search_docs}
86
 
 
87
  @tool
88
- def web_search(query: str) -> str:
89
- """Search Tavily for a query and return maximum 3 results.
90
-
 
91
  Args:
92
- query: The search query."""
93
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
94
- formatted_search_docs = "\n\n---\n\n".join(
95
- [
96
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
97
- for doc in search_docs
98
- ])
99
- return {"web_results": formatted_search_docs}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  @tool
102
- def arvix_search(query: str) -> str:
103
  """Search Arxiv for a query and return maximum 3 result.
104
-
105
  Args:
106
  query: The search query."""
107
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  formatted_search_docs = "\n\n---\n\n".join(
109
  [
110
  f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
111
- for doc in search_docs
112
- ])
113
- return {"arvix_results": formatted_search_docs}
114
-
115
 
116
 
117
  # load the system prompt from the file
@@ -134,12 +226,10 @@ vector_store = SupabaseVectorStore(
134
  )
135
  create_retriever_tool = create_retriever_tool(
136
  retriever=vector_store.as_retriever(),
137
- name="Question Search",
138
  description="A tool to retrieve similar questions from a vector store.",
139
  )
140
 
141
-
142
-
143
  tools = [
144
  multiply,
145
  add,
@@ -149,6 +239,7 @@ tools = [
149
  wiki_search,
150
  web_search,
151
  arvix_search,
 
152
  ]
153
 
154
  # Build graph function
 
18
 
19
  load_dotenv()
20
 
21
+
22
  @tool
23
  def multiply(a: int, b: int) -> int:
24
  """Multiply two numbers.
 
29
  """
30
  return a * b
31
 
32
+
33
  @tool
34
  def add(a: int, b: int) -> int:
35
  """Add two numbers.
36
+
37
  Args:
38
  a: first int
39
  b: second int
40
  """
41
  return a + b
42
 
43
+
44
  @tool
45
  def subtract(a: int, b: int) -> int:
46
  """Subtract two numbers.
47
+
48
  Args:
49
  a: first int
50
  b: second int
51
  """
52
  return a - b
53
 
54
+
55
  @tool
56
+ def divide(a: int, b: int) -> float:
57
  """Divide two numbers.
58
+
59
  Args:
60
  a: first int
61
  b: second int
 
64
  raise ValueError("Cannot divide by zero.")
65
  return a / b
66
 
67
+
68
  @tool
69
  def modulus(a: int, b: int) -> int:
70
  """Get the modulus of two numbers.
71
+
72
  Args:
73
  a: first int
74
  b: second int
75
  """
76
  return a % b
77
 
78
+
79
  @tool
80
+ def wiki_search(query: str) -> dict:
81
  """Search Wikipedia for a query and return maximum 2 results.
82
+
83
  Args:
84
  query: The search query."""
85
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
 
87
  [
88
  f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
89
  for doc in search_docs
90
+ ]
91
+ )
92
  return {"wiki_results": formatted_search_docs}
93
 
94
+
95
  @tool
96
+ def web_search(query: str) -> dict:
97
+ """Search Tavily for a query and return maximum 3 results,
98
+ formatted with source URL, title, and content.
99
+
100
  Args:
101
+ query: The search query.
102
+ """
103
+
104
+ tavily_tool = TavilySearchResults(max_results=3)
105
+
106
+ # 'search_docs' is expected to be a list of dictionaries based on your sample.
107
+ # Each dictionary contains keys like 'url', 'content', 'title'.
108
+ search_docs = tavily_tool.invoke(query)
109
+
110
+ final_formatted_docs = []
111
+
112
+ if isinstance(search_docs, list):
113
+ for doc_dict in search_docs: # Iterate through the list of result dictionaries
114
+ if isinstance(doc_dict, dict):
115
+ # Extract data using dictionary keys found in your sample:
116
+ source_url = doc_dict.get(
117
+ "url",
118
+ "N/A"
119
+ ) # From your sample, e.g., 'https://www.biblegateway.com/...'
120
+ page_content = doc_dict.get(
121
+ "content",
122
+ ""
123
+ ) # From your sample, e.g., '8\xa0When the king’s order...'
124
+ title = doc_dict.get(
125
+ "title",
126
+ "No Title Provided"
127
+ ) # From your sample, e.g., 'Esther 1-10 NIV...'
128
+
129
+ # Format the output string including source, title, and content
130
+ final_formatted_docs.append(
131
+ f'<Document source="{source_url}" title="{title}"/>\n{page_content}\n</Document>'
132
+ )
133
+ else:
134
+ # This handles cases where an item in the list returned by Tavily might not be a dictionary.
135
+ print(
136
+ f"[web_search_DEBUG] Expected a dictionary in search_docs list, but got {type(doc_dict)}: {str(doc_dict)[:100]}"
137
+ )
138
+ elif isinstance(search_docs, str):
139
+ # This handles cases where the Tavily tool might return a single string (e.g., an error message)
140
+ print(
141
+ f"[web_search_DEBUG] Tavily search returned a string, possibly an error: {search_docs}"
142
+ )
143
+ final_formatted_docs.append(
144
+ f'<Document source="Error" title="Error"/>\n{search_docs}\n</Document>'
145
+ )
146
+ else:
147
+ # This handles any other unexpected types for search_docs
148
+ print(
149
+ f"[web_search_DEBUG] Expected search_docs to be a list or string, but got {type(search_docs)}. Output may be empty."
150
+ )
151
+
152
+ joined_formatted_docs = "\n\n---\n\n".join(final_formatted_docs)
153
+
154
+ return {"web_results": joined_formatted_docs}
155
+
156
 
157
  @tool
158
+ def arvix_search(query: str) -> dict:
159
  """Search Arxiv for a query and return maximum 3 result.
160
+
161
  Args:
162
  query: The search query."""
163
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
164
+
165
+ # print(f"[arvix_search_DEBUG] ArxivLoader found {len(search_docs)} documents.")
166
+
167
+ processed_docs_str_list = []
168
+ for i, doc in enumerate(search_docs):
169
+ # print(f"\n--- [arvix_search_DEBUG] Document {i+1} ---")
170
+ # print(f"Metadata: {doc.metadata}")
171
+ # print(f"Page Content (first 200 chars): {doc.page_content[:200]}...")
172
+ # print(f"--- End Debug for Document {i+1} ---\n")
173
+
174
+ # Your original logic to format the document (with the fix for 'source')
175
+ title = doc.metadata.get("Title", "N/A")
176
+ published = doc.metadata.get(
177
+ "Published",
178
+ "N/A"
179
+ ) # 'page' might often be empty for ArxivLoader results
180
+ # content_snippet = doc.page_content[:3000]
181
+ content_snippet = doc.page_content
182
+
183
+ formatted_doc_str = f'<Document title="{title}" published="{published}"/>\n{content_snippet}\n</Document>'
184
+ processed_docs_str_list.append(formatted_doc_str)
185
+
186
+ formatted_search_results = "\n\n---\n\n".join(processed_docs_str_list)
187
+
188
+ # print(f"[arvix_search_DEBUG] Returning: {{\"arvix_results\": \"{formatted_search_results[:100]}...\"}}")
189
+
190
+ return {"arvix_results": formatted_search_results}
191
+
192
+
193
+ @tool
194
+ def similar_question_search(question: str) -> dict:
195
+ """Search the vector database for similar questions and return the first results.
196
+
197
+ Args:
198
+ question: the question human provided."""
199
+ matched_docs = vector_store.similarity_search(question, 3)
200
  formatted_search_docs = "\n\n---\n\n".join(
201
  [
202
  f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
203
+ for doc in matched_docs
204
+ ]
205
+ )
206
+ return {"similar_questions": formatted_search_docs}
207
 
208
 
209
  # load the system prompt from the file
 
226
  )
227
  create_retriever_tool = create_retriever_tool(
228
  retriever=vector_store.as_retriever(),
229
+ name="question_retriever",
230
  description="A tool to retrieve similar questions from a vector store.",
231
  )
232
 
 
 
233
  tools = [
234
  multiply,
235
  add,
 
239
  wiki_search,
240
  web_search,
241
  arvix_search,
242
+ similar_question_search,
243
  ]
244
 
245
  # Build graph function
test.ipynb CHANGED
The diff for this file is too large to render. See raw diff