AlvaroMros commited on
Commit
3994c21
·
1 Parent(s): 2181ea6

Add model update pipeline and improve CLI options

Browse files

Introduces a new 'update' pipeline option to retrain models only if new data is detected or models are missing, without running evaluation. Refactors model instantiation into a shared MODELS_TO_RUN list, updates CLI help and documentation, and implements the update_models_if_new_data method in PredictionPipeline.

Files changed (4) hide show
  1. README.md +6 -2
  2. src/main.py +15 -2
  3. src/predict/main.py +14 -1
  4. src/predict/pipeline.py +21 -0
README.md CHANGED
@@ -50,13 +50,17 @@ python -m src.main --pipeline predict --force-retrain
50
  ```
51
  Always retrains all models from scratch with latest data. This is useful for when the way training models changes
52
 
53
- ### 3. Complete Pipeline
54
-
55
  ```bash
56
  python -m src.main --pipeline all --scrape-mode update
57
  ```
58
  Runs scraping (update mode), analysis, and prediction in sequence.
59
 
 
 
 
 
 
60
  ## Model Performance
61
 
62
  The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).
 
50
  ```
51
  Always retrains all models from scratch with latest data. This is useful for when the way training models changes
52
 
53
+ #### 2.1 Complete Pipeline
 
54
  ```bash
55
  python -m src.main --pipeline all --scrape-mode update
56
  ```
57
  Runs scraping (update mode), analysis, and prediction in sequence.
58
 
59
+ #### 2.2 Update Models
60
+ ```bash
61
+ python -m src.main --pipeline update
62
+ ```
63
+
64
  ## Model Performance
65
 
66
  The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).
src/main.py CHANGED
@@ -15,8 +15,8 @@ def main():
15
  '--pipeline',
16
  type=str,
17
  default='scrape',
18
- choices=['scrape', 'analysis', 'predict', 'all'],
19
- help="Pipeline to run: 'scrape', 'analysis', 'predict', or 'all'"
20
  )
21
  parser.add_argument(
22
  '--scrape-mode',
@@ -69,6 +69,19 @@ def main():
69
  print("\n=== Running ELO Analysis ===")
70
  from analysis.elo import main as elo_main
71
  elo_main()
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  if args.pipeline in ['predict', 'all']:
74
  print("\n=== Running Prediction Pipeline ===")
 
15
  '--pipeline',
16
  type=str,
17
  default='scrape',
18
+ choices=['scrape', 'analysis', 'predict', 'update', 'all'],
19
+ help="Pipeline to run: 'scrape', 'analysis', 'predict', 'update', or 'all'"
20
  )
21
  parser.add_argument(
22
  '--scrape-mode',
 
69
  print("\n=== Running ELO Analysis ===")
70
  from analysis.elo import main as elo_main
71
  elo_main()
72
+
73
+ if args.pipeline == 'update':
74
+ print("\n=== Running Model Update Pipeline ===")
75
+ try:
76
+ from src.predict.main import MODELS_TO_RUN
77
+ from src.predict.pipeline import PredictionPipeline
78
+ except ImportError:
79
+ print("Fatal: Could not import prediction modules.")
80
+ print("Please ensure your project structure and python path are correct.")
81
+ return
82
+
83
+ pipeline = PredictionPipeline(models=MODELS_TO_RUN)
84
+ pipeline.update_models_if_new_data()
85
 
86
  if args.pipeline in ['predict', 'all']:
87
  print("\n=== Running Prediction Pipeline ===")
src/predict/main.py CHANGED
@@ -12,6 +12,19 @@ from src.predict.models import (
12
  LGBMModel
13
  )
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def main():
16
  """
17
  Main entry point to run the prediction pipeline.
@@ -70,7 +83,7 @@ def main():
70
  # --- End of Model Definition ---
71
 
72
  pipeline = PredictionPipeline(
73
- models=models_to_run,
74
  use_existing_models=use_existing_models,
75
  force_retrain=force_retrain
76
  )
 
12
  LGBMModel
13
  )
14
 
15
+ # --- Define Models to Run ---
16
+ # Instantiate all the models you want to evaluate here.
17
+ MODELS_TO_RUN = [
18
+ EloBaselineModel(),
19
+ LogisticRegressionModel(),
20
+ XGBoostModel(),
21
+ SVCModel(),
22
+ RandomForestModel(),
23
+ BernoulliNBModel(),
24
+ LGBMModel(),
25
+ ]
26
+ # --- End of Model Definition ---
27
+
28
  def main():
29
  """
30
  Main entry point to run the prediction pipeline.
 
83
  # --- End of Model Definition ---
84
 
85
  pipeline = PredictionPipeline(
86
+ models=MODELS_TO_RUN,
87
  use_existing_models=use_existing_models,
88
  force_retrain=force_retrain
89
  )
src/predict/pipeline.py CHANGED
@@ -232,6 +232,27 @@ class PredictionPipeline:
232
  if should_retrain:
233
  self._train_and_save_models()
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  def _train_and_save_models(self):
236
  """Trains all models on the full dataset and saves them."""
237
  print("\n\n--- Training and Saving All Models on Full Dataset ---")
 
232
  if should_retrain:
233
  self._train_and_save_models()
234
 
235
+ def update_models_if_new_data(self):
236
+ """
237
+ Checks for new data and retrains/saves all models on the full dataset if needed.
238
+ This does not run any evaluation.
239
+ """
240
+ print("\n--- Checking for Model Updates ---")
241
+
242
+ # Check if any model files are missing or invalid
243
+ missing_models = [m for m in self.models if not self._model_exists(m)]
244
+ has_new_data = self._has_new_data_since_last_training()
245
+
246
+ if missing_models:
247
+ missing_names = [m.__class__.__name__ for m in missing_models]
248
+ print(f"Missing or invalid model files found for: {missing_names}.")
249
+ self._train_and_save_models()
250
+ elif has_new_data:
251
+ print("New data detected, retraining all models...")
252
+ self._train_and_save_models()
253
+ else:
254
+ print("No new data detected. Models are already up-to-date.")
255
+
256
  def _train_and_save_models(self):
257
  """Trains all models on the full dataset and saves them."""
258
  print("\n\n--- Training and Saving All Models on Full Dataset ---")