import pickle import mlflow from fastapi import FastAPI from pydantic import BaseModel from mlflow import MlflowClient from dotenv import load_dotenv import os import pandas as pd import xgboost as xgb load_dotenv(override=True) # Carga las variables del archivo .env mlflow.set_tracking_uri("databricks") client = MlflowClient() EXPERIMENT_NAME = "/Users/zapatacc@gmail.com/nyc-taxi-experiment-prefect" run_ = mlflow.search_runs(order_by=['metrics.rmse ASC'], output_format="list", experiment_names=[EXPERIMENT_NAME] )[0] run_id = run_.info.run_id run_uri = f"runs:/{run_id}/preprocessor" client.download_artifacts( run_id=run_id, path='preprocessor', dst_path='.' ) with open("preprocessor/preprocessor.b", "rb") as f_in: dv = pickle.load(f_in) model_name = "workspace.default.nyc-taxi-model-prefect" alias = "champion" model_uri = f"models:/{model_name}@{alias}" champion_model = mlflow.pyfunc.load_model( model_uri=model_uri ) def preprocess(input_data): input_dict = { 'PU_DO': input_data.PULocationID + "_" + input_data.DOLocationID, 'trip_distance': input_data.trip_distance, } X = dv.transform([input_dict]) # Names depend on sklearn version try: cols = dv.get_feature_names_out() except AttributeError: cols = dv.get_feature_names() # X_df = pd.DataFrame(X.toarray(), columns=cols) return X_df def predict(input_data): X_val = preprocess(input_data) return champion_model.predict(X_val) app = FastAPI() class InputData(BaseModel): PULocationID: str DOLocationID: str trip_distance: float @app.post("/api/v1/predict") def predict_endpoint(input_data: InputData): result = predict(input_data)[0] print(f"Prediction: {result}") return {"prediction": float(result)}