tlmk22
/

QuefrencyGuardian

@@ -1,6 +1,5 @@
 import os
 import struct
-import pickle
 from pathlib import Path
 from typing import Literal, Union
@@ -9,6 +8,7 @@ import torch
 import lightgbm as lgb
 import torchaudio
 from huggingface_hub import hf_hub_download
 from sklearn.exceptions import NotFittedError
 from torch import Tensor
 from torchaudio.transforms import Spectrogram
@@ -366,7 +366,7 @@ class FastModelHuggingFace:
     Methods
     -------
     from_pretrained(repo_id: str, revision: str = "main",
-                    pipeline_file_name: str = "pipeline.pkl",
                     model_file_name: str = "model_lightgbm.txt") -> "FastModelHuggingFace":
         Loads the FastModel pipeline and model from the Hugging Face Hub.
     predict(input_data: Union[str, "HuggingFaceDataset"], get_proba: bool = False) -> np.ndarray:
@@ -392,7 +392,7 @@ class FastModelHuggingFace:
         cls,
         repo_id: str,
         revision: str = "main",
-        pipeline_file_name: str = "pipeline.pkl",
         model_file_name: str = "model_lightgbm.txt",
     ) -> "FastModelHuggingFace":
         """
@@ -405,7 +405,7 @@ class FastModelHuggingFace:
         revision : str, optional
             The specific revision of the repository to use (default is "main").
         pipeline_file_name : str, optional
-            The filename of the serialized pipeline (default is "pipeline.pkl").
         model_file_name : str, optional
             The filename of the LightGBM model (default is "model_lightgbm.txt").
@@ -424,8 +424,7 @@ class FastModelHuggingFace:
         if not os.path.exists(pipeline_path):
             raise FileNotFoundError(f"Pipeline file {pipeline_path} is missing or corrupted.")
-        with open(pipeline_path, "rb") as f:
-            pipeline = pickle.load(f)
         if not os.path.exists(model_lgbm_path):
             raise FileNotFoundError(
@@ -512,10 +511,10 @@ def save_pipeline(
     lgbm_file_name : str, optional
         The filename for saving the LightGBM model (default is "model_fast_model.txt").
     pipeline_file_name : str, optional
-        The filename for saving the pipeline (default is "pipeline.pkl").
     """
     lgbm_file_name = lgbm_file_name or "model_lightgbm.txt"
-    pipeline_file_name = pipeline_file_name or "pipeline.pkl"
     lightgbm_path = Path(path) / lgbm_file_name
     if model_class_instance.model:
@@ -523,8 +522,7 @@ def save_pipeline(
         model_class_instance.model.save_model(model_class_instance.model_file_name)
     pipeline_path = Path(path) / pipeline_file_name
-    with open(pipeline_path, "wb") as f:
-        pickle.dump(model_class_instance, f)
 def load_pipeline(
@@ -540,7 +538,7 @@ def load_pipeline(
     lgbm_file_name : str, optional
         The filename for the LightGBM model (default is "model_fast_model.txt").
     pipeline_file_name : str, optional
-        The filename for the pipeline (default is "pipeline.pkl").
     Returns
     -------
@@ -553,13 +551,13 @@ def load_pipeline(
         If either the LightGBM model or pipeline file is not found.
     """
     lgbm_file_name = lgbm_file_name or "model_fast_model.txt"
-    pipeline_file_name = pipeline_file_name or "pipeline.pkl"
     pipeline_path = Path(path) / pipeline_file_name
     if not pipeline_path.exists():
         raise FileNotFoundError(f"Pipeline file {pipeline_path} not found.")
-    with open(pipeline_path, "rb") as f:
-        model_class_instance = pickle.load(f)
     lightgbm_path = Path(path) / lgbm_file_name
     if not lightgbm_path.exists():

 import os
 import struct
 from pathlib import Path
 from typing import Literal, Union
 import lightgbm as lgb
 import torchaudio
 from huggingface_hub import hf_hub_download
+from joblib import dump, load
 from sklearn.exceptions import NotFittedError
 from torch import Tensor
 from torchaudio.transforms import Spectrogram
     Methods
     -------
     from_pretrained(repo_id: str, revision: str = "main",
+                    pipeline_file_name: str = "pipeline.joblib",
                     model_file_name: str = "model_lightgbm.txt") -> "FastModelHuggingFace":
         Loads the FastModel pipeline and model from the Hugging Face Hub.
     predict(input_data: Union[str, "HuggingFaceDataset"], get_proba: bool = False) -> np.ndarray:
         cls,
         repo_id: str,
         revision: str = "main",
+        pipeline_file_name: str = "pipeline.joblib",
         model_file_name: str = "model_lightgbm.txt",
     ) -> "FastModelHuggingFace":
         """
         revision : str, optional
             The specific revision of the repository to use (default is "main").
         pipeline_file_name : str, optional
+            The filename of the serialized pipeline (default is "pipeline.joblib").
         model_file_name : str, optional
             The filename of the LightGBM model (default is "model_lightgbm.txt").
         if not os.path.exists(pipeline_path):
             raise FileNotFoundError(f"Pipeline file {pipeline_path} is missing or corrupted.")
+        pipeline = load(pipeline_path)
         if not os.path.exists(model_lgbm_path):
             raise FileNotFoundError(
     lgbm_file_name : str, optional
         The filename for saving the LightGBM model (default is "model_fast_model.txt").
     pipeline_file_name : str, optional
+        The filename for saving the pipeline (default is "pipeline.joblib").
     """
     lgbm_file_name = lgbm_file_name or "model_lightgbm.txt"
+    pipeline_file_name = pipeline_file_name or "pipeline.joblib"
     lightgbm_path = Path(path) / lgbm_file_name
     if model_class_instance.model:
         model_class_instance.model.save_model(model_class_instance.model_file_name)
     pipeline_path = Path(path) / pipeline_file_name
+    dump(model_class_instance, pipeline_path)
 def load_pipeline(
     lgbm_file_name : str, optional
         The filename for the LightGBM model (default is "model_fast_model.txt").
     pipeline_file_name : str, optional
+        The filename for the pipeline (default is "pipeline.joblib").
     Returns
     -------
         If either the LightGBM model or pipeline file is not found.
     """
     lgbm_file_name = lgbm_file_name or "model_fast_model.txt"
+    pipeline_file_name = pipeline_file_name or "pipeline.joblib"
     pipeline_path = Path(path) / pipeline_file_name
     if not pipeline_path.exists():
         raise FileNotFoundError(f"Pipeline file {pipeline_path} not found.")
+    model_class_instance = load(pipeline_path)
     lightgbm_path = Path(path) / lgbm_file_name
     if not lightgbm_path.exists():

pipeline.pkl → pipeline.joblib RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faff5f8ba72a4be0fe89fb5951c53fe70b5ccd53170e81c141a27691361b9155
 size 834053

 version https://git-lfs.github.com/spec/v1
+oid sha256:04a292b51ec618f28089ee0933b30e6623f3abff3e282aafaca15b13c402a847
 size 834053