daniel-wojahn commited on
Commit
82a29b2
·
1 Parent(s): 2c93726

dynamic api cache of openrouter free llms

Browse files
Files changed (3) hide show
  1. .gitignore +1 -2
  2. README.md +4 -3
  3. pipeline/llm_service.py +115 -8
.gitignore CHANGED
@@ -1,4 +1,3 @@
1
  venv
2
  __pycache__
3
- academic_article.md
4
- tibetan_collation_prototype
 
1
  venv
2
  __pycache__
3
+ docker
 
README.md CHANGED
@@ -80,9 +80,10 @@ The Tibetan Text Metrics project provides quantitative methods for assessing tex
80
 
81
  The application includes an "Interpret Results" button that provides scholarly insights about your text similarity metrics. This feature:
82
 
83
- 1. Uses a selection of free OpenRouter models to analyze your results
84
- 2. Requires an OpenRouter API key (set via environment variable)
85
- 3. The AI will provide a comprehensive scholarly analysis including:
 
86
  - Introduction explaining the texts compared and general observations
87
  - Overall patterns across all chapters with visualized trends
88
  - Detailed examination of notable chapters (highest/lowest similarity)
 
80
 
81
  The application includes an "Interpret Results" button that provides scholarly insights about your text similarity metrics. This feature:
82
 
83
+ 1. **Dynamic model selection**: Automatically discovers available free models from OpenRouter (Qwen, Google Gemma, Meta Llama, Mistral, DeepSeek)
84
+ 2. Requires an OpenRouter API key (set via environment variable `OPENROUTER_API_KEY`)
85
+ 3. Falls back to rule-based analysis if no API key is provided or all models fail
86
+ 4. The AI will provide a comprehensive scholarly analysis including:
87
  - Introduction explaining the texts compared and general observations
88
  - Overall patterns across all chapters with visualized trends
89
  - Detailed examination of notable chapters (highest/lowest similarity)
pipeline/llm_service.py CHANGED
@@ -26,16 +26,115 @@ except ImportError:
26
 
27
  # Constants
28
  DEFAULT_MAX_TOKENS = 4000
29
- # Updated December 2024 - current free models on OpenRouter
30
- PREFERRED_MODELS = [
31
- "meta-llama/llama-4-maverick:free", # 400B MoE, best quality
32
- "deepseek/deepseek-chat-v3-0324:free", # Good for dialogue/analysis
33
- "mistralai/mistral-small-3.1-24b-instruct:free", # 24B, good balance
34
- "nousresearch/deephermes-3-llama-3-8b-preview:free", # 8B fallback
35
- ]
36
  DEFAULT_TEMPERATURE = 0.3
37
  DEFAULT_TOP_P = 0.9
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  class LLMService:
40
  """
41
  Service for analyzing text similarity metrics using LLMs and rule-based methods.
@@ -49,9 +148,17 @@ class LLMService:
49
  api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
50
  """
51
  self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
52
- self.models = PREFERRED_MODELS
53
  self.temperature = DEFAULT_TEMPERATURE
54
  self.top_p = DEFAULT_TOP_P
 
 
 
 
 
 
 
 
 
55
 
56
  def analyze_similarity(
57
  self,
 
26
 
27
  # Constants
28
  DEFAULT_MAX_TOKENS = 4000
 
 
 
 
 
 
 
29
  DEFAULT_TEMPERATURE = 0.3
30
  DEFAULT_TOP_P = 0.9
31
 
32
+ # Fallback models if dynamic fetch fails
33
+ FALLBACK_MODELS = [
34
+ "qwen/qwen3-235b-a22b:free",
35
+ "google/gemma-3-27b-it:free",
36
+ "meta-llama/llama-3.3-70b-instruct:free",
37
+ "mistralai/mistral-nemo:free",
38
+ ]
39
+
40
+ # Cache for dynamically fetched models
41
+ _cached_free_models = None
42
+ _cache_timestamp = 0
43
+ CACHE_TTL_SECONDS = 3600 # Refresh model list every hour
44
+
45
+
46
+ # Preferred providers for text analysis (in priority order)
47
+ PREFERRED_PROVIDERS = ["qwen", "google", "meta-llama", "mistralai", "deepseek"]
48
+
49
+ # Skip models with these keywords (vision, embedding, code-specific, etc.)
50
+ SKIP_KEYWORDS = ["vision", "vl", "embed", "guard", "coder", "code", "image", "audio", "video"]
51
+
52
+
53
+ def fetch_free_models(max_models: int = 5) -> list:
54
+ """
55
+ Fetch available free text models from OpenRouter API.
56
+ Returns a curated list of model IDs from preferred providers.
57
+
58
+ Args:
59
+ max_models: Maximum number of models to return (default: 5)
60
+ """
61
+ global _cached_free_models, _cache_timestamp
62
+ import time
63
+
64
+ # Return cached models if still valid
65
+ current_time = time.time()
66
+ if _cached_free_models and (current_time - _cache_timestamp) < CACHE_TTL_SECONDS:
67
+ return _cached_free_models
68
+
69
+ try:
70
+ logger.info("Fetching available free models from OpenRouter...")
71
+ response = requests.get(
72
+ "https://openrouter.ai/api/v1/models",
73
+ timeout=10
74
+ )
75
+ response.raise_for_status()
76
+ models_data = response.json().get("data", [])
77
+
78
+ # Filter for free text-only models from preferred providers
79
+ free_models = []
80
+ for model in models_data:
81
+ model_id = model.get("id", "")
82
+ pricing = model.get("pricing", {})
83
+
84
+ # Check if model is free (prompt and completion both $0)
85
+ prompt_price = float(pricing.get("prompt", "1") or "1")
86
+ completion_price = float(pricing.get("completion", "1") or "1")
87
+
88
+ if prompt_price != 0 or completion_price != 0:
89
+ continue
90
+
91
+ # Skip vision, embedding, code-specific models
92
+ model_lower = model_id.lower()
93
+ if any(skip in model_lower for skip in SKIP_KEYWORDS):
94
+ continue
95
+
96
+ # Check if from a preferred provider
97
+ provider = model_id.split("/")[0] if "/" in model_id else ""
98
+ if provider not in PREFERRED_PROVIDERS:
99
+ continue
100
+
101
+ # Get context length and provider priority for sorting
102
+ context_length = model.get("context_length", 0)
103
+ provider_priority = PREFERRED_PROVIDERS.index(provider) if provider in PREFERRED_PROVIDERS else 99
104
+
105
+ free_models.append({
106
+ "id": model_id,
107
+ "context_length": context_length,
108
+ "provider_priority": provider_priority
109
+ })
110
+
111
+ # Sort by provider priority first, then by context length
112
+ free_models.sort(key=lambda x: (x["provider_priority"], -x["context_length"]))
113
+
114
+ # Take one model per provider to ensure diversity
115
+ seen_providers = set()
116
+ selected_models = []
117
+ for model in free_models:
118
+ provider = model["id"].split("/")[0]
119
+ if provider not in seen_providers:
120
+ selected_models.append(model["id"])
121
+ seen_providers.add(provider)
122
+ if len(selected_models) >= max_models:
123
+ break
124
+
125
+ if selected_models:
126
+ logger.info(f"Selected {len(selected_models)} free models: {selected_models}")
127
+ _cached_free_models = selected_models
128
+ _cache_timestamp = current_time
129
+ return _cached_free_models
130
+ else:
131
+ logger.warning("No suitable free models found, using fallback list")
132
+ return FALLBACK_MODELS
133
+
134
+ except Exception as e:
135
+ logger.warning(f"Failed to fetch models from OpenRouter: {e}. Using fallback list.")
136
+ return FALLBACK_MODELS
137
+
138
  class LLMService:
139
  """
140
  Service for analyzing text similarity metrics using LLMs and rule-based methods.
 
148
  api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
149
  """
150
  self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
 
151
  self.temperature = DEFAULT_TEMPERATURE
152
  self.top_p = DEFAULT_TOP_P
153
+ # Models are fetched dynamically when needed
154
+ self._models = None
155
+
156
+ @property
157
+ def models(self) -> list:
158
+ """Lazily fetch and cache available free models."""
159
+ if self._models is None:
160
+ self._models = fetch_free_models()
161
+ return self._models
162
 
163
  def analyze_similarity(
164
  self,