Spaces:

AlvaroMros
/

ufc-predictor

Sleeping

AlvaroMros commited on Jul 15

Commit

3994c21

1 Parent(s): 2181ea6

Add model update pipeline and improve CLI options

Introduces a new 'update' pipeline option to retrain models only if new data is detected or models are missing, without running evaluation. Refactors model instantiation into a shared MODELS_TO_RUN list, updates CLI help and documentation, and implements the update_models_if_new_data method in PredictionPipeline.

Files changed (4) hide show

README.md +6 -2
src/main.py +15 -2
src/predict/main.py +14 -1
src/predict/pipeline.py +21 -0

README.md CHANGED Viewed

@@ -50,13 +50,17 @@ python -m src.main --pipeline predict --force-retrain
 ```
 Always retrains all models from scratch with latest data. This is useful for when the way training models changes
-### 3. Complete Pipeline
 ```bash
 python -m src.main --pipeline all --scrape-mode update
 ```
 Runs scraping (update mode), analysis, and prediction in sequence.
 ## Model Performance
 The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).

 ```
 Always retrains all models from scratch with latest data. This is useful for when the way training models changes
+#### 2.1 Complete Pipeline
 ```bash
 python -m src.main --pipeline all --scrape-mode update
 ```
 Runs scraping (update mode), analysis, and prediction in sequence.
+#### 2.2 Update Models
+```bash
+python -m src.main --pipeline update
+```
 ## Model Performance
 The system tests on the latest UFC event for realistic accuracy scores (typically 50-70% for fight prediction).

src/main.py CHANGED Viewed

@@ -15,8 +15,8 @@ def main():
         '--pipeline',
         type=str,
         default='scrape',
-        choices=['scrape', 'analysis', 'predict', 'all'],
-        help="Pipeline to run: 'scrape', 'analysis', 'predict', or 'all'"
     )
     parser.add_argument(
         '--scrape-mode',
@@ -69,6 +69,19 @@ def main():
         print("\n=== Running ELO Analysis ===")
         from analysis.elo import main as elo_main
         elo_main()
     if args.pipeline in ['predict', 'all']:
         print("\n=== Running Prediction Pipeline ===")

         '--pipeline',
         type=str,
         default='scrape',
+        choices=['scrape', 'analysis', 'predict', 'update', 'all'],
+        help="Pipeline to run: 'scrape', 'analysis', 'predict', 'update', or 'all'"
     )
     parser.add_argument(
         '--scrape-mode',
         print("\n=== Running ELO Analysis ===")
         from analysis.elo import main as elo_main
         elo_main()
+    if args.pipeline == 'update':
+        print("\n=== Running Model Update Pipeline ===")
+        try:
+            from src.predict.main import MODELS_TO_RUN
+            from src.predict.pipeline import PredictionPipeline
+        except ImportError:
+            print("Fatal: Could not import prediction modules.")
+            print("Please ensure your project structure and python path are correct.")
+            return
+        pipeline = PredictionPipeline(models=MODELS_TO_RUN)
+        pipeline.update_models_if_new_data()
     if args.pipeline in ['predict', 'all']:
         print("\n=== Running Prediction Pipeline ===")

src/predict/main.py CHANGED Viewed

@@ -12,6 +12,19 @@ from src.predict.models import (
     LGBMModel
 )
 def main():
     """
     Main entry point to run the prediction pipeline.
@@ -70,7 +83,7 @@ def main():
     # --- End of Model Definition ---
     pipeline = PredictionPipeline(
-        models=models_to_run,
         use_existing_models=use_existing_models,
         force_retrain=force_retrain
     )

     LGBMModel
 )
+# --- Define Models to Run ---
+# Instantiate all the models you want to evaluate here.
+MODELS_TO_RUN = [
+    EloBaselineModel(),
+    LogisticRegressionModel(),
+    XGBoostModel(),
+    SVCModel(),
+    RandomForestModel(),
+    BernoulliNBModel(),
+    LGBMModel(),
+]
+# --- End of Model Definition ---
 def main():
     """
     Main entry point to run the prediction pipeline.
     # --- End of Model Definition ---
     pipeline = PredictionPipeline(
+        models=MODELS_TO_RUN,
         use_existing_models=use_existing_models,
         force_retrain=force_retrain
     )

src/predict/pipeline.py CHANGED Viewed

@@ -232,6 +232,27 @@ class PredictionPipeline:
         if should_retrain:
             self._train_and_save_models()
     def _train_and_save_models(self):
         """Trains all models on the full dataset and saves them."""
         print("\n\n--- Training and Saving All Models on Full Dataset ---")

         if should_retrain:
             self._train_and_save_models()
+    def update_models_if_new_data(self):
+        """
+        Checks for new data and retrains/saves all models on the full dataset if needed.
+        This does not run any evaluation.
+        """
+        print("\n--- Checking for Model Updates ---")
+        # Check if any model files are missing or invalid
+        missing_models = [m for m in self.models if not self._model_exists(m)]
+        has_new_data = self._has_new_data_since_last_training()
+        if missing_models:
+            missing_names = [m.__class__.__name__ for m in missing_models]
+            print(f"Missing or invalid model files found for: {missing_names}.")
+            self._train_and_save_models()
+        elif has_new_data:
+            print("New data detected, retraining all models...")
+            self._train_and_save_models()
+        else:
+            print("No new data detected. Models are already up-to-date.")
     def _train_and_save_models(self):
         """Trains all models on the full dataset and saves them."""
         print("\n\n--- Training and Saving All Models on Full Dataset ---")