Final_Assignment_Template

Sleeping

File size: 4,236 Bytes

552b51a

from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from difflib import SequenceMatcher

@tool
def add(a: float, b: float) -> float:
    """
    Adds two numbers.
    Args:
        a (float): the first number
        b (float): the second number
    """
    return a + b

@tool
def subtract(a: float, b: float) -> float:
    """
    Subtracts two numbers.
    Args:
        a (float): the first number
        b (float): the second number
    """
    return a - b
    
@tool
def multiply(a: float, b: float) -> float:
    """
    Multiplies two numbers.
    Args:
        a (float): the first number
        b (float): the second number
    """
    return a * b

@tool
def divide(a: float, b: float) -> float:
    """
    Divides two numbers.
    Args:
        a (float): the first float number
        b (float): the second float number
    """
    if b == 0:
        raise ValueError("Cannot divided by zero.")
    return a / b

@tool
def modulus(a: int, b: int) -> int:
    """
    Get the modulus of two numbers.
    Args:
        a (int): the first number
        b (int): the second number
    """
    return a % b

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for a query and return maximum 2 results.
    
    Args:
        query: The search query."""
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return {"wiki_results": formatted_search_docs}

@tool
def web_search(query: str) -> str:
    """Search Tavily for a query and return maximum 3 results.
    
    Args:
        query: The search query."""
    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return {"web_results": formatted_search_docs}

@tool
def arvix_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.
    
    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ])
    return {"arvix_results": formatted_search_docs}

@tool
def search_metadata(query: str) -> str:
    """Search through metadata.jsonl file for matching questions and answers.
    
    Args:
        query: The search query to match against questions in the metadata file.
    """
    import json
    
    results = []
    with open("metadata.jsonl", "r", encoding="utf-8") as f:
        for line in f:
            try:
                data = json.loads(line)
                # Calculate similarity ratio
                similarity = SequenceMatcher(None, query.lower(), data["Question"].lower()).ratio()
                if similarity > 0.6:  # Threshold for similarity
                    results.append({
                        "question": data["Question"],
                        "answer": data["Final answer"],
                        "steps": data["Annotator Metadata"]["Steps"],
                        "similarity": similarity
                    })
            except json.JSONDecodeError:
                continue
    
    if not results:
        return "No matching results found in metadata."
    
    # Sort by similarity
    results.sort(key=lambda x: x["similarity"], reverse=True)
    
    formatted_results = "\n\n---\n\n".join(
        [f"Question: {r['question']}\nAnswer: {r['answer']}\nSteps: {r['steps']}" 
         for r in results[:3]]  # Return top 3 most similar results
    )
    return formatted_results