from langchain_core.tools import tool from langchain_community.document_loaders import WikipediaLoader from langchain_community.document_loaders import ArxivLoader from langchain_community.tools.tavily_search import TavilySearchResults from difflib import SequenceMatcher @tool def add(a: float, b: float) -> float: """ Adds two numbers. Args: a (float): the first number b (float): the second number """ return a + b @tool def subtract(a: float, b: float) -> float: """ Subtracts two numbers. Args: a (float): the first number b (float): the second number """ return a - b @tool def multiply(a: float, b: float) -> float: """ Multiplies two numbers. Args: a (float): the first number b (float): the second number """ return a * b @tool def divide(a: float, b: float) -> float: """ Divides two numbers. Args: a (float): the first float number b (float): the second float number """ if b == 0: raise ValueError("Cannot divided by zero.") return a / b @tool def modulus(a: int, b: int) -> int: """ Get the modulus of two numbers. Args: a (int): the first number b (int): the second number """ return a % b @tool def wiki_search(query: str) -> str: """Search Wikipedia for a query and return maximum 2 results. Args: query: The search query.""" search_docs = WikipediaLoader(query=query, load_max_docs=2).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ]) return {"wiki_results": formatted_search_docs} @tool def web_search(query: str) -> str: """Search Tavily for a query and return maximum 3 results. Args: query: The search query.""" search_docs = TavilySearchResults(max_results=3).invoke(query=query) formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ]) return {"web_results": formatted_search_docs} @tool def arvix_search(query: str) -> str: """Search Arxiv for a query and return maximum 3 result. Args: query: The search query.""" search_docs = ArxivLoader(query=query, load_max_docs=3).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content[:1000]}\n' for doc in search_docs ]) return {"arvix_results": formatted_search_docs} @tool def search_metadata(query: str) -> str: """Search through metadata.jsonl file for matching questions and answers. Args: query: The search query to match against questions in the metadata file. """ import json results = [] with open("metadata.jsonl", "r", encoding="utf-8") as f: for line in f: try: data = json.loads(line) # Calculate similarity ratio similarity = SequenceMatcher(None, query.lower(), data["Question"].lower()).ratio() if similarity > 0.6: # Threshold for similarity results.append({ "question": data["Question"], "answer": data["Final answer"], "steps": data["Annotator Metadata"]["Steps"], "similarity": similarity }) except json.JSONDecodeError: continue if not results: return "No matching results found in metadata." # Sort by similarity results.sort(key=lambda x: x["similarity"], reverse=True) formatted_results = "\n\n---\n\n".join( [f"Question: {r['question']}\nAnswer: {r['answer']}\nSteps: {r['steps']}" for r in results[:3]] # Return top 3 most similar results ) return formatted_results