Spaces:
Runtime error
Runtime error
import os | |
import json | |
import requests | |
from typing import List | |
from bs4 import BeautifulSoup | |
def search_duckduckgo(query: str, max_results: int = 3) -> List[str]: | |
""" | |
Perform a search on DuckDuckGo and extract result snippets. | |
Args: | |
query (str): The query string to search. | |
max_results (int): Max number of snippets to return. | |
Returns: | |
List[str]: A list of result snippets (summaries). | |
""" | |
print(f"[DuckDuckGo] Searching for: {query}") | |
search_url = f"https://html.duckduckgo.com/html/?q={query.replace(' ', '+')}&kl=wt-wt" | |
headers = { | |
"User-Agent": ( | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
"AppleWebKit/537.36 (KHTML, like Gecko) " | |
"Chrome/91.0.4472.124 Safari/537.36" | |
) | |
} | |
try: | |
response = requests.get(search_url, headers=headers, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
snippets = [] | |
for result in soup.select(".result"): | |
snippet = result.select_one(".result__snippet") | |
if snippet: | |
snippets.append(snippet.get_text(strip=True)) | |
if len(snippets) >= max_results: | |
break | |
print(f"[DuckDuckGo] Found {len(snippets)} snippets.") | |
return snippets | |
except Exception as e: | |
print(f"[DuckDuckGo] Error: {e}") | |
return [] | |
def search_langsearch(query: str, max_items: int = 5) -> List[str]: | |
""" | |
Search using LangSearch API and return summarized results. | |
Args: | |
query (str): The query to search. | |
max_items (int): Number of summaries to return. | |
Returns: | |
List[str]: A list of summarized web page results. | |
""" | |
print(f"[LangSearch] Searching for: {query}") | |
token = os.getenv("LS_TOKEN") | |
if not token: | |
print("[LangSearch] Error: LS_TOKEN environment variable not found.") | |
return [] | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {token}" | |
} | |
body = { | |
"query": query, | |
"freshness": "noLimit", | |
"summary": True, | |
"count": max_items | |
} | |
try: | |
response = requests.post( | |
"https://api.langsearch.com/v1/web-search", | |
headers=headers, | |
data=json.dumps(body), | |
timeout=30 | |
) | |
response.raise_for_status() | |
data = response.json() | |
summaries = [ | |
entry["summary"] | |
for entry in data.get("data", {}).get("webPages", {}).get("value", []) | |
] | |
print(f"[LangSearch] Retrieved {len(summaries)} summaries.") | |
return summaries | |
except Exception as e: | |
print(f"[LangSearch] Error: {e}") | |
return [] | |
# Function mapping for LLM tool use | |
TOOLS_MAPPING = { | |
"search_duckduckgo": search_duckduckgo, | |
"search_langsearch": search_langsearch | |
} | |
# Function definitions for OpenAI-compatible tool calling | |
TOOLS_DEFINITION = [ | |
{ | |
"type": "function", | |
"function": { | |
"name": "search_duckduckgo", | |
"description": "Search DuckDuckGo for short result snippets related to a query.", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"query": { | |
"type": "string", | |
"description": "Search query to send to DuckDuckGo." | |
}, | |
"max_results": { | |
"type": "integer", | |
"description": "Number of result snippets to return.", | |
"default": 3 | |
} | |
}, | |
"required": ["query"] | |
} | |
} | |
}, | |
{ | |
"type": "function", | |
"function": { | |
"name": "search_langsearch", | |
"description": "Use LangSearch API to retrieve summarized web content.", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"query": { | |
"type": "string", | |
"description": "Text to search using LangSearch." | |
}, | |
"max_items": { | |
"type": "integer", | |
"description": "Number of summarized results to return.", | |
"default": 5 | |
} | |
}, | |
"required": ["query"] | |
} | |
} | |
} | |
] | |