|
from langchain_core.tools import tool |
|
from langchain_community.document_loaders import WikipediaLoader |
|
from langchain_community.document_loaders import ArxivLoader |
|
from langchain_community.tools.tavily_search import TavilySearchResults |
|
from difflib import SequenceMatcher |
|
|
|
@tool |
|
def add(a: float, b: float) -> float: |
|
""" |
|
Adds two numbers. |
|
Args: |
|
a (float): the first number |
|
b (float): the second number |
|
""" |
|
return a + b |
|
|
|
@tool |
|
def subtract(a: float, b: float) -> float: |
|
""" |
|
Subtracts two numbers. |
|
Args: |
|
a (float): the first number |
|
b (float): the second number |
|
""" |
|
return a - b |
|
|
|
@tool |
|
def multiply(a: float, b: float) -> float: |
|
""" |
|
Multiplies two numbers. |
|
Args: |
|
a (float): the first number |
|
b (float): the second number |
|
""" |
|
return a * b |
|
|
|
@tool |
|
def divide(a: float, b: float) -> float: |
|
""" |
|
Divides two numbers. |
|
Args: |
|
a (float): the first float number |
|
b (float): the second float number |
|
""" |
|
if b == 0: |
|
raise ValueError("Cannot divided by zero.") |
|
return a / b |
|
|
|
@tool |
|
def modulus(a: int, b: int) -> int: |
|
""" |
|
Get the modulus of two numbers. |
|
Args: |
|
a (int): the first number |
|
b (int): the second number |
|
""" |
|
return a % b |
|
|
|
@tool |
|
def wiki_search(query: str) -> str: |
|
"""Search Wikipedia for a query and return maximum 2 results. |
|
|
|
Args: |
|
query: The search query.""" |
|
search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
|
formatted_search_docs = "\n\n---\n\n".join( |
|
[ |
|
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
|
for doc in search_docs |
|
]) |
|
return {"wiki_results": formatted_search_docs} |
|
|
|
@tool |
|
def web_search(query: str) -> str: |
|
"""Search Tavily for a query and return maximum 3 results. |
|
|
|
Args: |
|
query: The search query.""" |
|
search_docs = TavilySearchResults(max_results=3).invoke(query=query) |
|
formatted_search_docs = "\n\n---\n\n".join( |
|
[ |
|
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
|
for doc in search_docs |
|
]) |
|
return {"web_results": formatted_search_docs} |
|
|
|
@tool |
|
def arvix_search(query: str) -> str: |
|
"""Search Arxiv for a query and return maximum 3 result. |
|
|
|
Args: |
|
query: The search query.""" |
|
search_docs = ArxivLoader(query=query, load_max_docs=3).load() |
|
formatted_search_docs = "\n\n---\n\n".join( |
|
[ |
|
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' |
|
for doc in search_docs |
|
]) |
|
return {"arvix_results": formatted_search_docs} |
|
|
|
@tool |
|
def search_metadata(query: str) -> str: |
|
"""Search through metadata.jsonl file for matching questions and answers. |
|
|
|
Args: |
|
query: The search query to match against questions in the metadata file. |
|
""" |
|
import json |
|
|
|
results = [] |
|
with open("metadata.jsonl", "r", encoding="utf-8") as f: |
|
for line in f: |
|
try: |
|
data = json.loads(line) |
|
|
|
similarity = SequenceMatcher(None, query.lower(), data["Question"].lower()).ratio() |
|
if similarity > 0.6: |
|
results.append({ |
|
"question": data["Question"], |
|
"answer": data["Final answer"], |
|
"steps": data["Annotator Metadata"]["Steps"], |
|
"similarity": similarity |
|
}) |
|
except json.JSONDecodeError: |
|
continue |
|
|
|
if not results: |
|
return "No matching results found in metadata." |
|
|
|
|
|
results.sort(key=lambda x: x["similarity"], reverse=True) |
|
|
|
formatted_results = "\n\n---\n\n".join( |
|
[f"Question: {r['question']}\nAnswer: {r['answer']}\nSteps: {r['steps']}" |
|
for r in results[:3]] |
|
) |
|
return formatted_results |