Spaces:
Sleeping
Sleeping
Commit
·
3994c21
1
Parent(s):
2181ea6
Add model update pipeline and improve CLI options
Browse filesIntroduces a new 'update' pipeline option to retrain models only if new data is detected or models are missing, without running evaluation. Refactors model instantiation into a shared MODELS_TO_RUN list, updates CLI help and documentation, and implements the update_models_if_new_data method in PredictionPipeline.
- README.md +6 -2
- src/main.py +15 -2
- src/predict/main.py +14 -1
- src/predict/pipeline.py +21 -0
README.md
CHANGED
@@ -50,13 +50,17 @@ python -m src.main --pipeline predict --force-retrain
|
|
50 |
```
|
51 |
Always retrains all models from scratch with latest data. This is useful for when the way training models changes
|
52 |
|
53 |
-
|
54 |
-
|
55 |
```bash
|
56 |
python -m src.main --pipeline all --scrape-mode update
|
57 |
```
|
58 |
Runs scraping (update mode), analysis, and prediction in sequence.
|
59 |
|
|
|
|
|
|
|
|
|
|
|
60 |
## Model Performance
|
61 |
|
62 |
The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).
|
|
|
50 |
```
|
51 |
Always retrains all models from scratch with latest data. This is useful for when the way training models changes
|
52 |
|
53 |
+
#### 2.1 Complete Pipeline
|
|
|
54 |
```bash
|
55 |
python -m src.main --pipeline all --scrape-mode update
|
56 |
```
|
57 |
Runs scraping (update mode), analysis, and prediction in sequence.
|
58 |
|
59 |
+
#### 2.2 Update Models
|
60 |
+
```bash
|
61 |
+
python -m src.main --pipeline update
|
62 |
+
```
|
63 |
+
|
64 |
## Model Performance
|
65 |
|
66 |
The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).
|
src/main.py
CHANGED
@@ -15,8 +15,8 @@ def main():
|
|
15 |
'--pipeline',
|
16 |
type=str,
|
17 |
default='scrape',
|
18 |
-
choices=['scrape', 'analysis', 'predict', 'all'],
|
19 |
-
help="Pipeline to run: 'scrape', 'analysis', 'predict', or 'all'"
|
20 |
)
|
21 |
parser.add_argument(
|
22 |
'--scrape-mode',
|
@@ -69,6 +69,19 @@ def main():
|
|
69 |
print("\n=== Running ELO Analysis ===")
|
70 |
from analysis.elo import main as elo_main
|
71 |
elo_main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
if args.pipeline in ['predict', 'all']:
|
74 |
print("\n=== Running Prediction Pipeline ===")
|
|
|
15 |
'--pipeline',
|
16 |
type=str,
|
17 |
default='scrape',
|
18 |
+
choices=['scrape', 'analysis', 'predict', 'update', 'all'],
|
19 |
+
help="Pipeline to run: 'scrape', 'analysis', 'predict', 'update', or 'all'"
|
20 |
)
|
21 |
parser.add_argument(
|
22 |
'--scrape-mode',
|
|
|
69 |
print("\n=== Running ELO Analysis ===")
|
70 |
from analysis.elo import main as elo_main
|
71 |
elo_main()
|
72 |
+
|
73 |
+
if args.pipeline == 'update':
|
74 |
+
print("\n=== Running Model Update Pipeline ===")
|
75 |
+
try:
|
76 |
+
from src.predict.main import MODELS_TO_RUN
|
77 |
+
from src.predict.pipeline import PredictionPipeline
|
78 |
+
except ImportError:
|
79 |
+
print("Fatal: Could not import prediction modules.")
|
80 |
+
print("Please ensure your project structure and python path are correct.")
|
81 |
+
return
|
82 |
+
|
83 |
+
pipeline = PredictionPipeline(models=MODELS_TO_RUN)
|
84 |
+
pipeline.update_models_if_new_data()
|
85 |
|
86 |
if args.pipeline in ['predict', 'all']:
|
87 |
print("\n=== Running Prediction Pipeline ===")
|
src/predict/main.py
CHANGED
@@ -12,6 +12,19 @@ from src.predict.models import (
|
|
12 |
LGBMModel
|
13 |
)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def main():
|
16 |
"""
|
17 |
Main entry point to run the prediction pipeline.
|
@@ -70,7 +83,7 @@ def main():
|
|
70 |
# --- End of Model Definition ---
|
71 |
|
72 |
pipeline = PredictionPipeline(
|
73 |
-
models=
|
74 |
use_existing_models=use_existing_models,
|
75 |
force_retrain=force_retrain
|
76 |
)
|
|
|
12 |
LGBMModel
|
13 |
)
|
14 |
|
15 |
+
# --- Define Models to Run ---
|
16 |
+
# Instantiate all the models you want to evaluate here.
|
17 |
+
MODELS_TO_RUN = [
|
18 |
+
EloBaselineModel(),
|
19 |
+
LogisticRegressionModel(),
|
20 |
+
XGBoostModel(),
|
21 |
+
SVCModel(),
|
22 |
+
RandomForestModel(),
|
23 |
+
BernoulliNBModel(),
|
24 |
+
LGBMModel(),
|
25 |
+
]
|
26 |
+
# --- End of Model Definition ---
|
27 |
+
|
28 |
def main():
|
29 |
"""
|
30 |
Main entry point to run the prediction pipeline.
|
|
|
83 |
# --- End of Model Definition ---
|
84 |
|
85 |
pipeline = PredictionPipeline(
|
86 |
+
models=MODELS_TO_RUN,
|
87 |
use_existing_models=use_existing_models,
|
88 |
force_retrain=force_retrain
|
89 |
)
|
src/predict/pipeline.py
CHANGED
@@ -232,6 +232,27 @@ class PredictionPipeline:
|
|
232 |
if should_retrain:
|
233 |
self._train_and_save_models()
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
def _train_and_save_models(self):
|
236 |
"""Trains all models on the full dataset and saves them."""
|
237 |
print("\n\n--- Training and Saving All Models on Full Dataset ---")
|
|
|
232 |
if should_retrain:
|
233 |
self._train_and_save_models()
|
234 |
|
235 |
+
def update_models_if_new_data(self):
|
236 |
+
"""
|
237 |
+
Checks for new data and retrains/saves all models on the full dataset if needed.
|
238 |
+
This does not run any evaluation.
|
239 |
+
"""
|
240 |
+
print("\n--- Checking for Model Updates ---")
|
241 |
+
|
242 |
+
# Check if any model files are missing or invalid
|
243 |
+
missing_models = [m for m in self.models if not self._model_exists(m)]
|
244 |
+
has_new_data = self._has_new_data_since_last_training()
|
245 |
+
|
246 |
+
if missing_models:
|
247 |
+
missing_names = [m.__class__.__name__ for m in missing_models]
|
248 |
+
print(f"Missing or invalid model files found for: {missing_names}.")
|
249 |
+
self._train_and_save_models()
|
250 |
+
elif has_new_data:
|
251 |
+
print("New data detected, retraining all models...")
|
252 |
+
self._train_and_save_models()
|
253 |
+
else:
|
254 |
+
print("No new data detected. Models are already up-to-date.")
|
255 |
+
|
256 |
def _train_and_save_models(self):
|
257 |
"""Trains all models on the full dataset and saves them."""
|
258 |
print("\n\n--- Training and Saving All Models on Full Dataset ---")
|