Spaces:
Sleeping
Sleeping
File size: 1,897 Bytes
70638b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import pickle
import mlflow
from fastapi import FastAPI
from pydantic import BaseModel
from mlflow import MlflowClient
from dotenv import load_dotenv
import os
import pandas as pd
import xgboost as xgb
load_dotenv(override=True) # Carga las variables del archivo .env
mlflow.set_tracking_uri("databricks")
client = MlflowClient()
EXPERIMENT_NAME = "/Users/[email protected]/nyc-taxi-experiment-prefect"
run_ = mlflow.search_runs(order_by=['metrics.rmse ASC'],
output_format="list",
experiment_names=[EXPERIMENT_NAME]
)[0]
run_id = run_.info.run_id
run_uri = f"runs:/{run_id}/preprocessor"
client.download_artifacts(
run_id=run_id,
path='preprocessor',
dst_path='.'
)
with open("preprocessor/preprocessor.b", "rb") as f_in:
dv = pickle.load(f_in)
model_name = "workspace.default.nyc-taxi-model-prefect"
alias = "champion"
model_uri = f"models:/{model_name}@{alias}"
champion_model = mlflow.pyfunc.load_model(
model_uri=model_uri
)
def preprocess(input_data):
input_dict = {
'PU_DO': input_data.PULocationID + "_" + input_data.DOLocationID,
'trip_distance': input_data.trip_distance,
}
X = dv.transform([input_dict])
# Names depend on sklearn version
try:
cols = dv.get_feature_names_out()
except AttributeError:
cols = dv.get_feature_names()
#
X_df = pd.DataFrame(X.toarray(), columns=cols)
return X_df
def predict(input_data):
X_val = preprocess(input_data)
return champion_model.predict(X_val)
app = FastAPI()
class InputData(BaseModel):
PULocationID: str
DOLocationID: str
trip_distance: float
@app.post("/api/v1/predict")
def predict_endpoint(input_data: InputData):
result = predict(input_data)[0]
print(f"Prediction: {result}")
return {"prediction": float(result)} |