Spaces:

aleixlopezpascual
/

final_assignment_v3

Sleeping

App Files Files Community

aleixlopezpascual commited on May 12

Commit

ae374df

1 Parent(s): 8232946

fix tools

Browse files

Files changed (2) hide show

agent.py +119 -28
test.ipynb +0 -0

agent.py CHANGED Viewed

@@ -18,6 +18,7 @@ from supabase.client import Client, create_client
 load_dotenv()
 @tool
 def multiply(a: int, b: int) -> int:
     """Multiply two numbers.
@@ -28,30 +29,33 @@ def multiply(a: int, b: int) -> int:
     """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
     """Add two numbers.
     Args:
         a: first int
         b: second int
     """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
     """Subtract two numbers.
     Args:
         a: first int
         b: second int
     """
     return a - b
 @tool
-def divide(a: int, b: int) -> int:
     """Divide two numbers.
     Args:
         a: first int
         b: second int
@@ -60,20 +64,22 @@ def divide(a: int, b: int) -> int:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
     """Get the modulus of two numbers.
     Args:
         a: first int
         b: second int
     """
     return a % b
 @tool
-def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results.
     Args:
         query: The search query."""
     search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
@@ -81,37 +87,123 @@ def wiki_search(query: str) -> str:
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
-        ])
     return {"wiki_results": formatted_search_docs}
 @tool
-def web_search(query: str) -> str:
-    """Search Tavily for a query and return maximum 3 results.
     Args:
-        query: The search query."""
-    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-            for doc in search_docs
-        ])
-    return {"web_results": formatted_search_docs}
 @tool
-def arvix_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     search_docs = ArxivLoader(query=query, load_max_docs=3).load()
     formatted_search_docs = "\n\n---\n\n".join(
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
-            for doc in search_docs
-        ])
-    return {"arvix_results": formatted_search_docs}
 # load the system prompt from the file
@@ -134,12 +226,10 @@ vector_store = SupabaseVectorStore(
 )
 create_retriever_tool = create_retriever_tool(
     retriever=vector_store.as_retriever(),
-    name="Question Search",
     description="A tool to retrieve similar questions from a vector store.",
 )
 tools = [
     multiply,
     add,
@@ -149,6 +239,7 @@ tools = [
     wiki_search,
     web_search,
     arvix_search,
 ]
 # Build graph function

 load_dotenv()
 @tool
 def multiply(a: int, b: int) -> int:
     """Multiply two numbers.
     """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
     """Add two numbers.
     Args:
         a: first int
         b: second int
     """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
     """Subtract two numbers.
     Args:
         a: first int
         b: second int
     """
     return a - b
 @tool
+def divide(a: int, b: int) -> float:
     """Divide two numbers.
     Args:
         a: first int
         b: second int
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
     """Get the modulus of two numbers.
     Args:
         a: first int
         b: second int
     """
     return a % b
 @tool
+def wiki_search(query: str) -> dict:
     """Search Wikipedia for a query and return maximum 2 results.
     Args:
         query: The search query."""
     search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
+        ]
+    )
     return {"wiki_results": formatted_search_docs}
 @tool
+def web_search(query: str) -> dict:
+    """Search Tavily for a query and return maximum 3 results,
+    formatted with source URL, title, and content.
     Args:
+        query: The search query.
+    """
+    tavily_tool = TavilySearchResults(max_results=3)
+    # 'search_docs' is expected to be a list of dictionaries based on your sample.
+    # Each dictionary contains keys like 'url', 'content', 'title'.
+    search_docs = tavily_tool.invoke(query)
+    final_formatted_docs = []
+    if isinstance(search_docs, list):
+        for doc_dict in search_docs:  # Iterate through the list of result dictionaries
+            if isinstance(doc_dict, dict):
+                # Extract data using dictionary keys found in your sample:
+                source_url = doc_dict.get(
+                    "url",
+                    "N/A"
+                    )  # From your sample, e.g., 'https://www.biblegateway.com/...'
+                page_content = doc_dict.get(
+                    "content",
+                    ""
+                    )  # From your sample, e.g., '8\xa0When the king’s order...'
+                title = doc_dict.get(
+                    "title",
+                    "No Title Provided"
+                    )  # From your sample, e.g., 'Esther 1-10 NIV...'
+                # Format the output string including source, title, and content
+                final_formatted_docs.append(
+                    f'<Document source="{source_url}" title="{title}"/>\n{page_content}\n</Document>'
+                )
+            else:
+                # This handles cases where an item in the list returned by Tavily might not be a dictionary.
+                print(
+                    f"[web_search_DEBUG] Expected a dictionary in search_docs list, but got {type(doc_dict)}: {str(doc_dict)[:100]}"
+                    )
+    elif isinstance(search_docs, str):
+        # This handles cases where the Tavily tool might return a single string (e.g., an error message)
+        print(
+            f"[web_search_DEBUG] Tavily search returned a string, possibly an error: {search_docs}"
+            )
+        final_formatted_docs.append(
+            f'<Document source="Error" title="Error"/>\n{search_docs}\n</Document>'
+        )
+    else:
+        # This handles any other unexpected types for search_docs
+        print(
+            f"[web_search_DEBUG] Expected search_docs to be a list or string, but got {type(search_docs)}. Output may be empty."
+            )
+    joined_formatted_docs = "\n\n---\n\n".join(final_formatted_docs)
+    return {"web_results": joined_formatted_docs}
 @tool
+def arvix_search(query: str) -> dict:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    # print(f"[arvix_search_DEBUG] ArxivLoader found {len(search_docs)} documents.")
+    processed_docs_str_list = []
+    for i, doc in enumerate(search_docs):
+        # print(f"\n--- [arvix_search_DEBUG] Document {i+1} ---")
+        # print(f"Metadata: {doc.metadata}")
+        # print(f"Page Content (first 200 chars): {doc.page_content[:200]}...")
+        # print(f"--- End Debug for Document {i+1} ---\n")
+        # Your original logic to format the document (with the fix for 'source')
+        title = doc.metadata.get("Title", "N/A")
+        published = doc.metadata.get(
+            "Published",
+            "N/A"
+            )  # 'page' might often be empty for ArxivLoader results
+        # content_snippet = doc.page_content[:3000]
+        content_snippet = doc.page_content
+        formatted_doc_str = f'<Document title="{title}" published="{published}"/>\n{content_snippet}\n</Document>'
+        processed_docs_str_list.append(formatted_doc_str)
+    formatted_search_results = "\n\n---\n\n".join(processed_docs_str_list)
+    # print(f"[arvix_search_DEBUG] Returning: {{\"arvix_results\": \"{formatted_search_results[:100]}...\"}}")
+    return {"arvix_results": formatted_search_results}
+@tool
+def similar_question_search(question: str) -> dict:
+    """Search the vector database for similar questions and return the first results.
+    Args:
+        question: the question human provided."""
+    matched_docs = vector_store.similarity_search(question, 3)
     formatted_search_docs = "\n\n---\n\n".join(
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in matched_docs
+        ]
+    )
+    return {"similar_questions": formatted_search_docs}
 # load the system prompt from the file
 )
 create_retriever_tool = create_retriever_tool(
     retriever=vector_store.as_retriever(),
+    name="question_retriever",
     description="A tool to retrieve similar questions from a vector store.",
 )
 tools = [
     multiply,
     add,
     wiki_search,
     web_search,
     arvix_search,
+    similar_question_search,
 ]
 # Build graph function

test.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff