Spaces:
Sleeping
Sleeping
Commit
·
82a29b2
1
Parent(s):
2c93726
dynamic api cache of openrouter free llms
Browse files- .gitignore +1 -2
- README.md +4 -3
- pipeline/llm_service.py +115 -8
.gitignore
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
venv
|
| 2 |
__pycache__
|
| 3 |
-
|
| 4 |
-
tibetan_collation_prototype
|
|
|
|
| 1 |
venv
|
| 2 |
__pycache__
|
| 3 |
+
docker
|
|
|
README.md
CHANGED
|
@@ -80,9 +80,10 @@ The Tibetan Text Metrics project provides quantitative methods for assessing tex
|
|
| 80 |
|
| 81 |
The application includes an "Interpret Results" button that provides scholarly insights about your text similarity metrics. This feature:
|
| 82 |
|
| 83 |
-
1.
|
| 84 |
-
2. Requires an OpenRouter API key (set via environment variable)
|
| 85 |
-
3.
|
|
|
|
| 86 |
- Introduction explaining the texts compared and general observations
|
| 87 |
- Overall patterns across all chapters with visualized trends
|
| 88 |
- Detailed examination of notable chapters (highest/lowest similarity)
|
|
|
|
| 80 |
|
| 81 |
The application includes an "Interpret Results" button that provides scholarly insights about your text similarity metrics. This feature:
|
| 82 |
|
| 83 |
+
1. **Dynamic model selection**: Automatically discovers available free models from OpenRouter (Qwen, Google Gemma, Meta Llama, Mistral, DeepSeek)
|
| 84 |
+
2. Requires an OpenRouter API key (set via environment variable `OPENROUTER_API_KEY`)
|
| 85 |
+
3. Falls back to rule-based analysis if no API key is provided or all models fail
|
| 86 |
+
4. The AI will provide a comprehensive scholarly analysis including:
|
| 87 |
- Introduction explaining the texts compared and general observations
|
| 88 |
- Overall patterns across all chapters with visualized trends
|
| 89 |
- Detailed examination of notable chapters (highest/lowest similarity)
|
pipeline/llm_service.py
CHANGED
|
@@ -26,16 +26,115 @@ except ImportError:
|
|
| 26 |
|
| 27 |
# Constants
|
| 28 |
DEFAULT_MAX_TOKENS = 4000
|
| 29 |
-
# Updated December 2024 - current free models on OpenRouter
|
| 30 |
-
PREFERRED_MODELS = [
|
| 31 |
-
"meta-llama/llama-4-maverick:free", # 400B MoE, best quality
|
| 32 |
-
"deepseek/deepseek-chat-v3-0324:free", # Good for dialogue/analysis
|
| 33 |
-
"mistralai/mistral-small-3.1-24b-instruct:free", # 24B, good balance
|
| 34 |
-
"nousresearch/deephermes-3-llama-3-8b-preview:free", # 8B fallback
|
| 35 |
-
]
|
| 36 |
DEFAULT_TEMPERATURE = 0.3
|
| 37 |
DEFAULT_TOP_P = 0.9
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
class LLMService:
|
| 40 |
"""
|
| 41 |
Service for analyzing text similarity metrics using LLMs and rule-based methods.
|
|
@@ -49,9 +148,17 @@ class LLMService:
|
|
| 49 |
api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
|
| 50 |
"""
|
| 51 |
self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
|
| 52 |
-
self.models = PREFERRED_MODELS
|
| 53 |
self.temperature = DEFAULT_TEMPERATURE
|
| 54 |
self.top_p = DEFAULT_TOP_P
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def analyze_similarity(
|
| 57 |
self,
|
|
|
|
| 26 |
|
| 27 |
# Constants
|
| 28 |
DEFAULT_MAX_TOKENS = 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
DEFAULT_TEMPERATURE = 0.3
|
| 30 |
DEFAULT_TOP_P = 0.9
|
| 31 |
|
| 32 |
+
# Fallback models if dynamic fetch fails
|
| 33 |
+
FALLBACK_MODELS = [
|
| 34 |
+
"qwen/qwen3-235b-a22b:free",
|
| 35 |
+
"google/gemma-3-27b-it:free",
|
| 36 |
+
"meta-llama/llama-3.3-70b-instruct:free",
|
| 37 |
+
"mistralai/mistral-nemo:free",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
# Cache for dynamically fetched models
|
| 41 |
+
_cached_free_models = None
|
| 42 |
+
_cache_timestamp = 0
|
| 43 |
+
CACHE_TTL_SECONDS = 3600 # Refresh model list every hour
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Preferred providers for text analysis (in priority order)
|
| 47 |
+
PREFERRED_PROVIDERS = ["qwen", "google", "meta-llama", "mistralai", "deepseek"]
|
| 48 |
+
|
| 49 |
+
# Skip models with these keywords (vision, embedding, code-specific, etc.)
|
| 50 |
+
SKIP_KEYWORDS = ["vision", "vl", "embed", "guard", "coder", "code", "image", "audio", "video"]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def fetch_free_models(max_models: int = 5) -> list:
|
| 54 |
+
"""
|
| 55 |
+
Fetch available free text models from OpenRouter API.
|
| 56 |
+
Returns a curated list of model IDs from preferred providers.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
max_models: Maximum number of models to return (default: 5)
|
| 60 |
+
"""
|
| 61 |
+
global _cached_free_models, _cache_timestamp
|
| 62 |
+
import time
|
| 63 |
+
|
| 64 |
+
# Return cached models if still valid
|
| 65 |
+
current_time = time.time()
|
| 66 |
+
if _cached_free_models and (current_time - _cache_timestamp) < CACHE_TTL_SECONDS:
|
| 67 |
+
return _cached_free_models
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
logger.info("Fetching available free models from OpenRouter...")
|
| 71 |
+
response = requests.get(
|
| 72 |
+
"https://openrouter.ai/api/v1/models",
|
| 73 |
+
timeout=10
|
| 74 |
+
)
|
| 75 |
+
response.raise_for_status()
|
| 76 |
+
models_data = response.json().get("data", [])
|
| 77 |
+
|
| 78 |
+
# Filter for free text-only models from preferred providers
|
| 79 |
+
free_models = []
|
| 80 |
+
for model in models_data:
|
| 81 |
+
model_id = model.get("id", "")
|
| 82 |
+
pricing = model.get("pricing", {})
|
| 83 |
+
|
| 84 |
+
# Check if model is free (prompt and completion both $0)
|
| 85 |
+
prompt_price = float(pricing.get("prompt", "1") or "1")
|
| 86 |
+
completion_price = float(pricing.get("completion", "1") or "1")
|
| 87 |
+
|
| 88 |
+
if prompt_price != 0 or completion_price != 0:
|
| 89 |
+
continue
|
| 90 |
+
|
| 91 |
+
# Skip vision, embedding, code-specific models
|
| 92 |
+
model_lower = model_id.lower()
|
| 93 |
+
if any(skip in model_lower for skip in SKIP_KEYWORDS):
|
| 94 |
+
continue
|
| 95 |
+
|
| 96 |
+
# Check if from a preferred provider
|
| 97 |
+
provider = model_id.split("/")[0] if "/" in model_id else ""
|
| 98 |
+
if provider not in PREFERRED_PROVIDERS:
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
# Get context length and provider priority for sorting
|
| 102 |
+
context_length = model.get("context_length", 0)
|
| 103 |
+
provider_priority = PREFERRED_PROVIDERS.index(provider) if provider in PREFERRED_PROVIDERS else 99
|
| 104 |
+
|
| 105 |
+
free_models.append({
|
| 106 |
+
"id": model_id,
|
| 107 |
+
"context_length": context_length,
|
| 108 |
+
"provider_priority": provider_priority
|
| 109 |
+
})
|
| 110 |
+
|
| 111 |
+
# Sort by provider priority first, then by context length
|
| 112 |
+
free_models.sort(key=lambda x: (x["provider_priority"], -x["context_length"]))
|
| 113 |
+
|
| 114 |
+
# Take one model per provider to ensure diversity
|
| 115 |
+
seen_providers = set()
|
| 116 |
+
selected_models = []
|
| 117 |
+
for model in free_models:
|
| 118 |
+
provider = model["id"].split("/")[0]
|
| 119 |
+
if provider not in seen_providers:
|
| 120 |
+
selected_models.append(model["id"])
|
| 121 |
+
seen_providers.add(provider)
|
| 122 |
+
if len(selected_models) >= max_models:
|
| 123 |
+
break
|
| 124 |
+
|
| 125 |
+
if selected_models:
|
| 126 |
+
logger.info(f"Selected {len(selected_models)} free models: {selected_models}")
|
| 127 |
+
_cached_free_models = selected_models
|
| 128 |
+
_cache_timestamp = current_time
|
| 129 |
+
return _cached_free_models
|
| 130 |
+
else:
|
| 131 |
+
logger.warning("No suitable free models found, using fallback list")
|
| 132 |
+
return FALLBACK_MODELS
|
| 133 |
+
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logger.warning(f"Failed to fetch models from OpenRouter: {e}. Using fallback list.")
|
| 136 |
+
return FALLBACK_MODELS
|
| 137 |
+
|
| 138 |
class LLMService:
|
| 139 |
"""
|
| 140 |
Service for analyzing text similarity metrics using LLMs and rule-based methods.
|
|
|
|
| 148 |
api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
|
| 149 |
"""
|
| 150 |
self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
|
|
|
|
| 151 |
self.temperature = DEFAULT_TEMPERATURE
|
| 152 |
self.top_p = DEFAULT_TOP_P
|
| 153 |
+
# Models are fetched dynamically when needed
|
| 154 |
+
self._models = None
|
| 155 |
+
|
| 156 |
+
@property
|
| 157 |
+
def models(self) -> list:
|
| 158 |
+
"""Lazily fetch and cache available free models."""
|
| 159 |
+
if self._models is None:
|
| 160 |
+
self._models = fetch_free_models()
|
| 161 |
+
return self._models
|
| 162 |
|
| 163 |
def analyze_similarity(
|
| 164 |
self,
|