Spaces:

daniel-wojahn
/

ttm-webapp-hf

Sleeping

App Files Files Community

daniel-wojahn commited on 12 days ago

Commit

82a29b2

1 Parent(s): 2c93726

dynamic api cache of openrouter free llms

Browse files

Files changed (3) hide show

.gitignore +1 -2
README.md +4 -3
pipeline/llm_service.py +115 -8

.gitignore CHANGED Viewed

@@ -1,4 +1,3 @@
 venv
 __pycache__
-academic_article.md
-tibetan_collation_prototype

 venv
 __pycache__
+docker

README.md CHANGED Viewed

@@ -80,9 +80,10 @@ The Tibetan Text Metrics project provides quantitative methods for assessing tex
 The application includes an "Interpret Results" button that provides scholarly insights about your text similarity metrics. This feature:
-1. Uses a selection of free OpenRouter models to analyze your results
-2. Requires an OpenRouter API key (set via environment variable)
-3. The AI will provide a comprehensive scholarly analysis including:
    - Introduction explaining the texts compared and general observations
    - Overall patterns across all chapters with visualized trends
    - Detailed examination of notable chapters (highest/lowest similarity)

 The application includes an "Interpret Results" button that provides scholarly insights about your text similarity metrics. This feature:
+1. **Dynamic model selection**: Automatically discovers available free models from OpenRouter (Qwen, Google Gemma, Meta Llama, Mistral, DeepSeek)
+2. Requires an OpenRouter API key (set via environment variable `OPENROUTER_API_KEY`)
+3. Falls back to rule-based analysis if no API key is provided or all models fail
+4. The AI will provide a comprehensive scholarly analysis including:
    - Introduction explaining the texts compared and general observations
    - Overall patterns across all chapters with visualized trends
    - Detailed examination of notable chapters (highest/lowest similarity)

pipeline/llm_service.py CHANGED Viewed

@@ -26,16 +26,115 @@ except ImportError:
 # Constants
 DEFAULT_MAX_TOKENS = 4000
-# Updated December 2024 - current free models on OpenRouter
-PREFERRED_MODELS = [
-    "meta-llama/llama-4-maverick:free",      # 400B MoE, best quality
-    "deepseek/deepseek-chat-v3-0324:free",   # Good for dialogue/analysis
-    "mistralai/mistral-small-3.1-24b-instruct:free",  # 24B, good balance
-    "nousresearch/deephermes-3-llama-3-8b-preview:free",  # 8B fallback
-]
 DEFAULT_TEMPERATURE = 0.3
 DEFAULT_TOP_P = 0.9
 class LLMService:
     """
     Service for analyzing text similarity metrics using LLMs and rule-based methods.
@@ -49,9 +148,17 @@ class LLMService:
             api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
         """
         self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
-        self.models = PREFERRED_MODELS
         self.temperature = DEFAULT_TEMPERATURE
         self.top_p = DEFAULT_TOP_P
     def analyze_similarity(
         self,

 # Constants
 DEFAULT_MAX_TOKENS = 4000
 DEFAULT_TEMPERATURE = 0.3
 DEFAULT_TOP_P = 0.9
+# Fallback models if dynamic fetch fails
+FALLBACK_MODELS = [
+    "qwen/qwen3-235b-a22b:free",
+    "google/gemma-3-27b-it:free",
+    "meta-llama/llama-3.3-70b-instruct:free",
+    "mistralai/mistral-nemo:free",
+]
+# Cache for dynamically fetched models
+_cached_free_models = None
+_cache_timestamp = 0
+CACHE_TTL_SECONDS = 3600  # Refresh model list every hour
+# Preferred providers for text analysis (in priority order)
+PREFERRED_PROVIDERS = ["qwen", "google", "meta-llama", "mistralai", "deepseek"]
+# Skip models with these keywords (vision, embedding, code-specific, etc.)
+SKIP_KEYWORDS = ["vision", "vl", "embed", "guard", "coder", "code", "image", "audio", "video"]
+def fetch_free_models(max_models: int = 5) -> list:
+    """
+    Fetch available free text models from OpenRouter API.
+    Returns a curated list of model IDs from preferred providers.
+    Args:
+        max_models: Maximum number of models to return (default: 5)
+    """
+    global _cached_free_models, _cache_timestamp
+    import time
+    # Return cached models if still valid
+    current_time = time.time()
+    if _cached_free_models and (current_time - _cache_timestamp) < CACHE_TTL_SECONDS:
+        return _cached_free_models
+    try:
+        logger.info("Fetching available free models from OpenRouter...")
+        response = requests.get(
+            "https://openrouter.ai/api/v1/models",
+            timeout=10
+        )
+        response.raise_for_status()
+        models_data = response.json().get("data", [])
+        # Filter for free text-only models from preferred providers
+        free_models = []
+        for model in models_data:
+            model_id = model.get("id", "")
+            pricing = model.get("pricing", {})
+            # Check if model is free (prompt and completion both $0)
+            prompt_price = float(pricing.get("prompt", "1") or "1")
+            completion_price = float(pricing.get("completion", "1") or "1")
+            if prompt_price != 0 or completion_price != 0:
+                continue
+            # Skip vision, embedding, code-specific models
+            model_lower = model_id.lower()
+            if any(skip in model_lower for skip in SKIP_KEYWORDS):
+                continue
+            # Check if from a preferred provider
+            provider = model_id.split("/")[0] if "/" in model_id else ""
+            if provider not in PREFERRED_PROVIDERS:
+                continue
+            # Get context length and provider priority for sorting
+            context_length = model.get("context_length", 0)
+            provider_priority = PREFERRED_PROVIDERS.index(provider) if provider in PREFERRED_PROVIDERS else 99
+            free_models.append({
+                "id": model_id,
+                "context_length": context_length,
+                "provider_priority": provider_priority
+            })
+        # Sort by provider priority first, then by context length
+        free_models.sort(key=lambda x: (x["provider_priority"], -x["context_length"]))
+        # Take one model per provider to ensure diversity
+        seen_providers = set()
+        selected_models = []
+        for model in free_models:
+            provider = model["id"].split("/")[0]
+            if provider not in seen_providers:
+                selected_models.append(model["id"])
+                seen_providers.add(provider)
+                if len(selected_models) >= max_models:
+                    break
+        if selected_models:
+            logger.info(f"Selected {len(selected_models)} free models: {selected_models}")
+            _cached_free_models = selected_models
+            _cache_timestamp = current_time
+            return _cached_free_models
+        else:
+            logger.warning("No suitable free models found, using fallback list")
+            return FALLBACK_MODELS
+    except Exception as e:
+        logger.warning(f"Failed to fetch models from OpenRouter: {e}. Using fallback list.")
+        return FALLBACK_MODELS
 class LLMService:
     """
     Service for analyzing text similarity metrics using LLMs and rule-based methods.
             api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
         """
         self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
         self.temperature = DEFAULT_TEMPERATURE
         self.top_p = DEFAULT_TOP_P
+        # Models are fetched dynamically when needed
+        self._models = None
+    @property
+    def models(self) -> list:
+        """Lazily fetch and cache available free models."""
+        if self._models is None:
+            self._models = fetch_free_models()
+        return self._models
     def analyze_similarity(
         self,