Spaces:

neural-thinker
/

cidadao.ai-backend

Paused

anderson-ufrj commited on Sep 25

Commit

44eae1d

1 Parent(s): 8273ba9

fix(ml): implement lazy initialization for MLTrainingPipeline

- Replace global instance with lazy initialization function
- Fix settings.get() to use getattr() for Pydantic settings
- Update all imports to use get_training_pipeline() function
- Prevents initialization errors during module import

Files changed (3) hide show

src/ml/__init__.py +0 -1
src/ml/ab_testing.py +6 -4
src/ml/training_pipeline.py +10 -8

src/ml/__init__.py CHANGED Viewed

@@ -9,7 +9,6 @@ This module provides machine learning capabilities including:
 from src.ml.training_pipeline import (
     MLTrainingPipeline,
-    training_pipeline,
     get_training_pipeline
 )

 from src.ml.training_pipeline import (
     MLTrainingPipeline,
     get_training_pipeline
 )

src/ml/ab_testing.py CHANGED Viewed

@@ -16,7 +16,7 @@ from scipy import stats
 from src.core import get_logger
 from src.core.cache import get_redis_client
-from src.ml.training_pipeline import training_pipeline
 logger = get_logger(__name__)
@@ -94,8 +94,9 @@ class ABTestFramework:
             raise ValueError("Traffic split must sum to 1.0")
         # Load models to verify they exist
-        await training_pipeline.load_model(*model_a)
-        await training_pipeline.load_model(*model_b)
         test_config = {
             "test_id": f"ab_test_{test_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
@@ -450,7 +451,8 @@ class ABTestFramework:
         # Promote winning model
         model_info = test_config[winner]
-        success = await training_pipeline.promote_model(
             model_info["model_id"],
             model_info["version"],
             "production"

 from src.core import get_logger
 from src.core.cache import get_redis_client
+from src.ml.training_pipeline import get_training_pipeline
 logger = get_logger(__name__)
             raise ValueError("Traffic split must sum to 1.0")
         # Load models to verify they exist
+        pipeline = get_training_pipeline()
+        await pipeline.load_model(*model_a)
+        await pipeline.load_model(*model_b)
         test_config = {
             "test_id": f"ab_test_{test_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
         # Promote winning model
         model_info = test_config[winner]
+        pipeline = get_training_pipeline()
+        success = await pipeline.promote_model(
             model_info["model_id"],
             model_info["version"],
             "production"

src/ml/training_pipeline.py CHANGED Viewed

@@ -52,7 +52,7 @@ class MLTrainingPipeline:
         """Initialize the training pipeline."""
         self.experiment_name = experiment_name
         self.mlflow_client = None
-        self.models_dir = Path(settings.get("ML_MODELS_DIR", "./models"))
         self.models_dir.mkdir(exist_ok=True)
         # Supported algorithms
@@ -70,7 +70,7 @@ class MLTrainingPipeline:
     def _initialize_mlflow(self):
         """Initialize MLflow tracking."""
         try:
-            mlflow.set_tracking_uri(settings.get("MLFLOW_TRACKING_URI", "file:./mlruns"))
             mlflow.set_experiment(self.experiment_name)
             self.mlflow_client = MlflowClient()
             logger.info(f"MLflow initialized with experiment: {self.experiment_name}")
@@ -514,10 +514,12 @@ class MLTrainingPipeline:
         return count
-# Global training pipeline instance
-training_pipeline = MLTrainingPipeline()
-async def get_training_pipeline() -> MLTrainingPipeline:
-    """Get the global training pipeline instance."""
-    return training_pipeline

         """Initialize the training pipeline."""
         self.experiment_name = experiment_name
         self.mlflow_client = None
+        self.models_dir = Path(getattr(settings, "ML_MODELS_DIR", "./models"))
         self.models_dir.mkdir(exist_ok=True)
         # Supported algorithms
     def _initialize_mlflow(self):
         """Initialize MLflow tracking."""
         try:
+            mlflow.set_tracking_uri(getattr(settings, "MLFLOW_TRACKING_URI", "file:./mlruns"))
             mlflow.set_experiment(self.experiment_name)
             self.mlflow_client = MlflowClient()
             logger.info(f"MLflow initialized with experiment: {self.experiment_name}")
         return count
+# Global training pipeline instance (lazy initialization)
+_training_pipeline = None
+def get_training_pipeline() -> MLTrainingPipeline:
+    """Get or create the global training pipeline instance."""
+    global _training_pipeline
+    if _training_pipeline is None:
+        _training_pipeline = MLTrainingPipeline()
+    return _training_pipeline