from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from difflib import SequenceMatcher
@tool
def add(a: float, b: float) -> float:
"""
Adds two numbers.
Args:
a (float): the first number
b (float): the second number
"""
return a + b
@tool
def subtract(a: float, b: float) -> float:
"""
Subtracts two numbers.
Args:
a (float): the first number
b (float): the second number
"""
return a - b
@tool
def multiply(a: float, b: float) -> float:
"""
Multiplies two numbers.
Args:
a (float): the first number
b (float): the second number
"""
return a * b
@tool
def divide(a: float, b: float) -> float:
"""
Divides two numbers.
Args:
a (float): the first float number
b (float): the second float number
"""
if b == 0:
raise ValueError("Cannot divided by zero.")
return a / b
@tool
def modulus(a: int, b: int) -> int:
"""
Get the modulus of two numbers.
Args:
a (int): the first number
b (int): the second number
"""
return a % b
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results.
Args:
query: The search query."""
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
@tool
def web_search(query: str) -> str:
"""Search Tavily for a query and return maximum 3 results.
Args:
query: The search query."""
search_docs = TavilySearchResults(max_results=3).invoke(query=query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return {"web_results": formatted_search_docs}
@tool
def arvix_search(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content[:1000]}\n'
for doc in search_docs
])
return {"arvix_results": formatted_search_docs}
@tool
def search_metadata(query: str) -> str:
"""Search through metadata.jsonl file for matching questions and answers.
Args:
query: The search query to match against questions in the metadata file.
"""
import json
results = []
with open("metadata.jsonl", "r", encoding="utf-8") as f:
for line in f:
try:
data = json.loads(line)
# Calculate similarity ratio
similarity = SequenceMatcher(None, query.lower(), data["Question"].lower()).ratio()
if similarity > 0.6: # Threshold for similarity
results.append({
"question": data["Question"],
"answer": data["Final answer"],
"steps": data["Annotator Metadata"]["Steps"],
"similarity": similarity
})
except json.JSONDecodeError:
continue
if not results:
return "No matching results found in metadata."
# Sort by similarity
results.sort(key=lambda x: x["similarity"], reverse=True)
formatted_results = "\n\n---\n\n".join(
[f"Question: {r['question']}\nAnswer: {r['answer']}\nSteps: {r['steps']}"
for r in results[:3]] # Return top 3 most similar results
)
return formatted_results