File size: 4,236 Bytes
552b51a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from difflib import SequenceMatcher
@tool
def add(a: float, b: float) -> float:
"""
Adds two numbers.
Args:
a (float): the first number
b (float): the second number
"""
return a + b
@tool
def subtract(a: float, b: float) -> float:
"""
Subtracts two numbers.
Args:
a (float): the first number
b (float): the second number
"""
return a - b
@tool
def multiply(a: float, b: float) -> float:
"""
Multiplies two numbers.
Args:
a (float): the first number
b (float): the second number
"""
return a * b
@tool
def divide(a: float, b: float) -> float:
"""
Divides two numbers.
Args:
a (float): the first float number
b (float): the second float number
"""
if b == 0:
raise ValueError("Cannot divided by zero.")
return a / b
@tool
def modulus(a: int, b: int) -> int:
"""
Get the modulus of two numbers.
Args:
a (int): the first number
b (int): the second number
"""
return a % b
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results.
Args:
query: The search query."""
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
@tool
def web_search(query: str) -> str:
"""Search Tavily for a query and return maximum 3 results.
Args:
query: The search query."""
search_docs = TavilySearchResults(max_results=3).invoke(query=query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"web_results": formatted_search_docs}
@tool
def arvix_search(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
for doc in search_docs
])
return {"arvix_results": formatted_search_docs}
@tool
def search_metadata(query: str) -> str:
"""Search through metadata.jsonl file for matching questions and answers.
Args:
query: The search query to match against questions in the metadata file.
"""
import json
results = []
with open("metadata.jsonl", "r", encoding="utf-8") as f:
for line in f:
try:
data = json.loads(line)
# Calculate similarity ratio
similarity = SequenceMatcher(None, query.lower(), data["Question"].lower()).ratio()
if similarity > 0.6: # Threshold for similarity
results.append({
"question": data["Question"],
"answer": data["Final answer"],
"steps": data["Annotator Metadata"]["Steps"],
"similarity": similarity
})
except json.JSONDecodeError:
continue
if not results:
return "No matching results found in metadata."
# Sort by similarity
results.sort(key=lambda x: x["similarity"], reverse=True)
formatted_results = "\n\n---\n\n".join(
[f"Question: {r['question']}\nAnswer: {r['answer']}\nSteps: {r['steps']}"
for r in results[:3]] # Return top 3 most similar results
)
return formatted_results |