Spaces:
Runtime error
Runtime error
| '''Tool functions for MCP server''' | |
| import os | |
| import threading | |
| import time | |
| import json | |
| import logging | |
| import queue | |
| from typing import Tuple | |
| from upstash_vector import Index | |
| from upstash_redis import Redis | |
| import functions.feed_extraction as extraction_funcs | |
| import functions.summarization as summarization_funcs | |
| import functions.rag as rag_funcs | |
| RAG_INGEST_QUEUE = queue.Queue() | |
| rag_ingest_thread = threading.Thread( | |
| target=rag_funcs.ingest, | |
| args=(RAG_INGEST_QUEUE,) | |
| ) | |
| rag_ingest_thread.start() | |
| def get_feed(website: str, n: int = 3) -> list: | |
| '''Gets RSS feed content from a given website. Can take a website or RSS | |
| feed URL directly, or the name of a website. Will attempt to find RSS | |
| feed and return title, summary and link to full article for most recent | |
| n items in feed. This function is slow a resource heavy, only call it when | |
| the user wants to check a feed for new content, or asks for content from a | |
| feed that you have not retrieved yet. | |
| Args: | |
| website: URL or name of website to extract RSS feed content from | |
| n: (optional) number of articles to parse from feed, defaults to 3 | |
| Returns: | |
| JSON string containing the feed content or 'No feed found' if a RSS | |
| feed for the requested website could not be found | |
| ''' | |
| start_time = time.time() | |
| logger = logging.getLogger(__name__ + '.get_feed()') | |
| logger.info('Getting feed content for: %s', website) | |
| # Find the feed's URI from the website name/URL | |
| feed_uri = extraction_funcs.find_feed_uri(website) | |
| logger.info('find_feed_uri() returned %s', feed_uri) | |
| if 'No feed found' in feed_uri: | |
| logger.info('Completed in %s seconds', round(time.time()-start_time, 2)) | |
| return 'No feed found' | |
| # Parse and extract content from the feed | |
| articles = extraction_funcs.parse_feed(feed_uri, n) | |
| logger.info('parse_feed() returned %s entries', len(list(articles.keys()))) | |
| # Loop on the posts, sending them to RAG (nonblocking) and summarization (blocking) | |
| for i, item in articles.items(): | |
| # Check if content is present | |
| if item['content'] is not None: | |
| logger.info('Summarizing/RAG ingesting: %s', item) | |
| # Send to RAG ingest | |
| RAG_INGEST_QUEUE.put(item.copy()) | |
| logger.info('"%s" sent to RAG ingest', item['title']) | |
| # Generate summary and add to content | |
| summary = summarization_funcs.summarize_content( | |
| item['title'], | |
| item['content'] | |
| ) | |
| articles[i]['summary'] = summary | |
| logger.info('Summary of "%s" generated', item['title']) | |
| # Remove full-text content before returning | |
| articles[i].pop('content', None) | |
| logger.info('Completed in %s seconds', round(time.time()-start_time, 2)) | |
| # Return content dictionary as string | |
| return json.dumps(articles) | |
| def context_search(query: str, article_title: str = None) -> list[Tuple[float, str]]: | |
| '''Searches for context relevant to query in article vector store. | |
| Use this Function to search for additional information before | |
| answering the user's question about an article. If article_title is | |
| provided the search will only return results from that article. If | |
| article_title is omitted, the search will include all articles | |
| currently in the cache. | |
| Ags: | |
| query: user query to find context for | |
| article_title: optional, use this argument to search only for | |
| context from a specific context, defaults to None | |
| Returns: | |
| List of tuples with the following format: [(relevance score, 'context string')] | |
| ''' | |
| logger = logging.getLogger(__name__ + 'context_search') | |
| index = Index( | |
| url='https://living-whale-89944-us1-vector.upstash.io', | |
| token=os.environ['UPSTASH_VECTOR_KEY'] | |
| ) | |
| results = None | |
| results = index.query( | |
| data=query, | |
| top_k=3, | |
| include_data=True, | |
| namespace=article_title | |
| ) | |
| logger.info('Retrieved %s chunks for "%s"', len(results), query) | |
| contexts = [] | |
| for result in results: | |
| contexts.append((result.score, result.data)) | |
| return contexts | |
| def find_article(query: str) -> list[Tuple[float, str]]: | |
| '''Uses vector search to find the most likely title of the article | |
| referred to by query. Use this function if the user is asking about | |
| and article, but it is unclear which article. | |
| Args: | |
| query: query to to find source article for | |
| Returns: | |
| List of tuples of most likely context article titles in the following format: | |
| [(relevance score, 'article title')] | |
| ''' | |
| logger = logging.getLogger(__name__ + 'context_search') | |
| index = Index( | |
| url='https://living-whale-89944-us1-vector.upstash.io', | |
| token=os.environ['UPSTASH_VECTOR_KEY'] | |
| ) | |
| results = None | |
| results = index.query( | |
| data=query, | |
| top_k=3, | |
| include_metadata=True, | |
| include_data=True | |
| ) | |
| logger.info('Retrieved %s chunks for "%s"', len(results), query) | |
| contexts = [] | |
| for result in results: | |
| contexts.append((result.score, result.metadata['namespace'])) | |
| return contexts | |
| def get_summary(title: str) -> str: | |
| '''Uses article title to get summary of article content. | |
| Args: | |
| title: title of article to get summary for | |
| Returns: | |
| Short summary of article content. Returns "No summary found" | |
| if summary does not exist. | |
| ''' | |
| logger = logging.getLogger(__name__ + '.get_summary()') | |
| redis = Redis( | |
| url='https://sensible-midge-19304.upstash.io', | |
| token=os.environ['UPSTASH_REDIS_KEY'] | |
| ) | |
| cache_key = f'{title} summary' | |
| summary = redis.get(cache_key) | |
| if summary: | |
| logger.info('Got summary for "%s": %s', title, summary[:100]) | |
| return summary | |
| logger.info('Could not find summary for: "%s"', title) | |
| return 'No summary found' | |