Commit
Β·
91e1308
1
Parent(s):
58b869c
refactor SearchProvider and its subclasses for improved readability and consistency
Browse files
app.py
CHANGED
@@ -270,56 +270,99 @@ class CacheManager:
|
|
270 |
def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
|
271 |
|
272 |
class SearchProvider(ABC):
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
@property @abstractmethod
|
279 |
-
def provider_name(self) -> str: pass
|
280 |
@abstractmethod
|
281 |
-
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
|
|
|
|
282 |
def search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
class GoogleProvider(SearchProvider):
|
293 |
-
@property
|
|
|
|
|
|
|
294 |
def __init__(self, config_dict: Dict):
|
295 |
super().__init__(config_dict)
|
296 |
self._api_key = self.provider_config.get("google_api_key")
|
297 |
self._cse_id = self.provider_config.get("google_cse_id")
|
298 |
self._timeout = self.provider_config.get("google_timeout", 8)
|
299 |
-
if self._api_key and self._cse_id:
|
300 |
-
|
|
|
|
|
|
|
|
|
|
|
301 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
302 |
try:
|
303 |
-
params = {
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
class TavilyProvider(SearchProvider):
|
313 |
-
@property
|
|
|
|
|
|
|
314 |
def __init__(self, config_dict: Dict):
|
315 |
super().__init__(config_dict)
|
316 |
self._api_key = self.provider_config.get("tavily_api_key")
|
317 |
self._search_depth = self.provider_config.get("tavily_depth", "basic")
|
318 |
if self._api_key and TavilyClient:
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
323 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
324 |
if not self._enabled: return None
|
325 |
try:
|
@@ -330,7 +373,9 @@ class TavilyProvider(SearchProvider):
|
|
330 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
331 |
|
332 |
class DuckDuckGoProvider(SearchProvider):
|
333 |
-
@property
|
|
|
|
|
334 |
def __init__(self, config_dict: Dict):
|
335 |
super().__init__(config_dict)
|
336 |
if DDGS:
|
|
|
270 |
def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
|
271 |
|
272 |
class SearchProvider(ABC):
|
273 |
+
@property
|
274 |
+
@abstractmethod
|
275 |
+
def provider_name(self) -> str:
|
276 |
+
pass
|
277 |
+
|
|
|
|
|
278 |
@abstractmethod
|
279 |
+
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
280 |
+
pass
|
281 |
+
|
282 |
def search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
283 |
+
if not self._enabled:
|
284 |
+
gaia_logger.debug(f"[{self.provider_name}] Skip: Not enabled.")
|
285 |
+
return None
|
286 |
+
if self._quota_limit != float('inf') and self._quota_used >= self._quota_limit:
|
287 |
+
gaia_logger.warning(f"[{self.provider_name}] Skip: Quota ({self._quota_used}/{int(self._quota_limit)})")
|
288 |
+
return None
|
289 |
+
usage_str = ""
|
290 |
+
if self._quota_limit != float('inf'):
|
291 |
+
self._quota_used += 1
|
292 |
+
usage_str = f"({self._quota_used}/{int(self._quota_limit)}) "
|
293 |
+
gaia_logger.info(f"[{self.provider_name}] {usage_str}Search: '{query[:70]}...'")
|
294 |
+
return self._perform_search(query, max_results)
|
295 |
+
|
296 |
+
def available(self) -> bool:
|
297 |
+
return self._enabled
|
298 |
|
299 |
class GoogleProvider(SearchProvider):
|
300 |
+
@property
|
301 |
+
def provider_name(self) -> str:
|
302 |
+
return "Google"
|
303 |
+
|
304 |
def __init__(self, config_dict: Dict):
|
305 |
super().__init__(config_dict)
|
306 |
self._api_key = self.provider_config.get("google_api_key")
|
307 |
self._cse_id = self.provider_config.get("google_cse_id")
|
308 |
self._timeout = self.provider_config.get("google_timeout", 8)
|
309 |
+
if self._api_key and self._cse_id:
|
310 |
+
self._enabled = True
|
311 |
+
gaia_logger.info(f"β {self.provider_name} API configured.")
|
312 |
+
else:
|
313 |
+
self._enabled = False
|
314 |
+
gaia_logger.warning(f"β {self.provider_name} API key/CSE ID missing in RAG config.")
|
315 |
+
|
316 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
317 |
try:
|
318 |
+
params = {
|
319 |
+
'key': self._api_key,
|
320 |
+
'cx': self._cse_id,
|
321 |
+
'q': query,
|
322 |
+
'num': max_results,
|
323 |
+
'safe': 'active'
|
324 |
+
}
|
325 |
+
response = requests.get(
|
326 |
+
"https://www.googleapis.com/customsearch/v1",
|
327 |
+
params=params,
|
328 |
+
timeout=self._timeout
|
329 |
+
)
|
330 |
+
response.raise_for_status()
|
331 |
+
data = response.json()
|
332 |
+
items = data.get('items', [])
|
333 |
+
if not items:
|
334 |
+
gaia_logger.info(f"[{self.provider_name}] No results for '{query[:70]}'")
|
335 |
+
return []
|
336 |
+
return [{
|
337 |
+
'href': i.get('link'),
|
338 |
+
'title': i.get('title', ''),
|
339 |
+
'body': i.get('snippet', '')
|
340 |
+
} for i in items]
|
341 |
+
except requests.exceptions.Timeout:
|
342 |
+
gaia_logger.warning(f"[{self.provider_name}] Timeout: '{query[:70]}'")
|
343 |
+
return None
|
344 |
+
except requests.exceptions.RequestException as e:
|
345 |
+
gaia_logger.warning(f"[{self.provider_name}] RequestEx: '{query[:70]}': {e}")
|
346 |
+
return None
|
347 |
+
except Exception as e:
|
348 |
+
gaia_logger.error(f"[{self.provider_name}] Error: '{query[:70]}': {e}", exc_info=True)
|
349 |
+
return None
|
350 |
|
351 |
class TavilyProvider(SearchProvider):
|
352 |
+
@property
|
353 |
+
def provider_name(self) -> str:
|
354 |
+
return "Tavily"
|
355 |
+
|
356 |
def __init__(self, config_dict: Dict):
|
357 |
super().__init__(config_dict)
|
358 |
self._api_key = self.provider_config.get("tavily_api_key")
|
359 |
self._search_depth = self.provider_config.get("tavily_depth", "basic")
|
360 |
if self._api_key and TavilyClient:
|
361 |
+
self._enabled = True
|
362 |
+
gaia_logger.info(f"β {self.provider_name} API configured.")
|
363 |
+
else:
|
364 |
+
self._enabled = False
|
365 |
+
gaia_logger.warning(f"β {self.provider_name} API key missing or TavilyClient not available in config.")
|
366 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
367 |
if not self._enabled: return None
|
368 |
try:
|
|
|
373 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
374 |
|
375 |
class DuckDuckGoProvider(SearchProvider):
|
376 |
+
@property
|
377 |
+
def provider_name(self) -> str:
|
378 |
+
return "DuckDuckGo"
|
379 |
def __init__(self, config_dict: Dict):
|
380 |
super().__init__(config_dict)
|
381 |
if DDGS:
|