zapatacc commited on
Commit
70638b1
·
1 Parent(s): 3ade10f

Add initial Dockerfile, API implementation, and requirements

Browse files
Files changed (4) hide show
  1. Dockerfile +17 -0
  2. README.md +1 -0
  3. api.py +82 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN apt-get update && apt-get install -y procps && \
8
+ pip install -U pip && \
9
+ rm /etc/localtime && \
10
+ ln -s /usr/share/zoneinfo/America/Mexico_City /etc/localtime && \
11
+ pip install -r ./requirements.txt
12
+
13
+ COPY ./api.py /code/
14
+
15
+ EXPOSE 8000
16
+
17
+ CMD ["uvicorn","api:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: blue
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
api.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import mlflow
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ from mlflow import MlflowClient
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import pandas as pd
9
+ import xgboost as xgb
10
+
11
+ load_dotenv(override=True) # Carga las variables del archivo .env
12
+
13
+ mlflow.set_tracking_uri("databricks")
14
+ client = MlflowClient()
15
+
16
+ EXPERIMENT_NAME = "/Users/[email protected]/nyc-taxi-experiment-prefect"
17
+
18
+ run_ = mlflow.search_runs(order_by=['metrics.rmse ASC'],
19
+ output_format="list",
20
+ experiment_names=[EXPERIMENT_NAME]
21
+ )[0]
22
+
23
+ run_id = run_.info.run_id
24
+
25
+ run_uri = f"runs:/{run_id}/preprocessor"
26
+
27
+ client.download_artifacts(
28
+ run_id=run_id,
29
+ path='preprocessor',
30
+ dst_path='.'
31
+ )
32
+
33
+ with open("preprocessor/preprocessor.b", "rb") as f_in:
34
+ dv = pickle.load(f_in)
35
+
36
+ model_name = "workspace.default.nyc-taxi-model-prefect"
37
+ alias = "champion"
38
+
39
+ model_uri = f"models:/{model_name}@{alias}"
40
+
41
+ champion_model = mlflow.pyfunc.load_model(
42
+ model_uri=model_uri
43
+ )
44
+
45
+ def preprocess(input_data):
46
+
47
+ input_dict = {
48
+ 'PU_DO': input_data.PULocationID + "_" + input_data.DOLocationID,
49
+ 'trip_distance': input_data.trip_distance,
50
+ }
51
+ X = dv.transform([input_dict])
52
+
53
+ # Names depend on sklearn version
54
+ try:
55
+ cols = dv.get_feature_names_out()
56
+ except AttributeError:
57
+ cols = dv.get_feature_names()
58
+
59
+ #
60
+ X_df = pd.DataFrame(X.toarray(), columns=cols)
61
+
62
+ return X_df
63
+
64
+ def predict(input_data):
65
+
66
+ X_val = preprocess(input_data)
67
+
68
+ return champion_model.predict(X_val)
69
+
70
+ app = FastAPI()
71
+
72
+ class InputData(BaseModel):
73
+ PULocationID: str
74
+ DOLocationID: str
75
+ trip_distance: float
76
+
77
+
78
+ @app.post("/api/v1/predict")
79
+ def predict_endpoint(input_data: InputData):
80
+ result = predict(input_data)[0]
81
+ print(f"Prediction: {result}")
82
+ return {"prediction": float(result)}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.121.0
2
+ mlflow==3.4.0
3
+ mlflow_skinny==3.4.0
4
+ mlflow_tracing==3.4.0
5
+ pandas==2.3.3
6
+ pydantic==2.12.4
7
+ python-dotenv==1.2.1
8
+ xgboost==3.1.1