Merge branch 'caching'
Browse files- .gitignore +3 -1
- app.py +3 -1
- past_pollution_data.csv +9 -7
- past_weather_data.csv +4 -2
- pollution_data.csv +2 -1
- scalers/feature_scaler_NO2.joblib +0 -3
- scalers/feature_scaler_O3.joblib +0 -3
- scalers/target_scaler_NO2.joblib +0 -3
- scalers/target_scaler_O3.joblib +0 -3
- src/data_api_calls.py +0 -2
- src/features_pipeline.py +11 -1
- src/predict.py +41 -9
- weather_data.csv +5 -3
.gitignore
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
.venv/
|
| 2 |
.env
|
| 3 |
__pycache__/
|
| 4 |
-
*.pyc
|
|
|
|
|
|
|
|
|
| 1 |
.venv/
|
| 2 |
.env
|
| 3 |
__pycache__/
|
| 4 |
+
*.pyc
|
| 5 |
+
*.joblib
|
| 6 |
+
scalers/
|
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import plotly.graph_objects as go
|
|
| 4 |
import streamlit as st
|
| 5 |
|
| 6 |
from src.helper_functions import custom_metric_box, pollution_box
|
| 7 |
-
from src.predict import get_data_and_predictions
|
| 8 |
|
| 9 |
st.set_page_config(
|
| 10 |
page_title="Utrecht Pollution Dashboard ",
|
|
@@ -15,6 +15,8 @@ st.set_page_config(
|
|
| 15 |
|
| 16 |
alt.themes.enable("dark")
|
| 17 |
|
|
|
|
|
|
|
| 18 |
week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
|
| 19 |
|
| 20 |
today = week_data.iloc[-1]
|
|
|
|
| 4 |
import streamlit as st
|
| 5 |
|
| 6 |
from src.helper_functions import custom_metric_box, pollution_box
|
| 7 |
+
from src.predict import get_data_and_predictions, update_data_and_predictions
|
| 8 |
|
| 9 |
st.set_page_config(
|
| 10 |
page_title="Utrecht Pollution Dashboard ",
|
|
|
|
| 15 |
|
| 16 |
alt.themes.enable("dark")
|
| 17 |
|
| 18 |
+
update_data_and_predictions()
|
| 19 |
+
|
| 20 |
week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
|
| 21 |
|
| 22 |
today = week_data.iloc[-1]
|
past_pollution_data.csv
CHANGED
|
@@ -4,13 +4,15 @@ date,NO2,O3
|
|
| 4 |
2023-10-20,17.233055555555563,18.7156
|
| 5 |
2023-10-21,15.023599999999991,22.04
|
| 6 |
2023-10-22,8.723378378378372,48.33439999999999
|
| 7 |
-
2023-10-23,20.
|
| 8 |
-
2023-10-24,15.1156,24.
|
| 9 |
2023-10-25,22.88567567567568,27.117599999999992
|
| 10 |
-
2023-10-26,21.
|
| 11 |
2023-10-27,23.07226666666666,16.15416666666666
|
| 12 |
2023-10-28,24.89121621621622,24.59040816326531
|
| 13 |
-
2023-10-29,9.
|
| 14 |
-
2023-10-30,11.
|
| 15 |
-
2023-10-31,17.
|
| 16 |
-
2023-11-01,21.
|
|
|
|
|
|
|
|
|
| 4 |
2023-10-20,17.233055555555563,18.7156
|
| 5 |
2023-10-21,15.023599999999991,22.04
|
| 6 |
2023-10-22,8.723378378378372,48.33439999999999
|
| 7 |
+
2023-10-23,20.63426666666668,15.586000000000002
|
| 8 |
+
2023-10-24,15.1156,24.62808510638297
|
| 9 |
2023-10-25,22.88567567567568,27.117599999999992
|
| 10 |
+
2023-10-26,21.53175675675676,13.3216
|
| 11 |
2023-10-27,23.07226666666666,16.15416666666666
|
| 12 |
2023-10-28,24.89121621621622,24.59040816326531
|
| 13 |
+
2023-10-29,9.724428571428573,51.525200000000005
|
| 14 |
+
2023-10-30,11.20205479452055,52.820600000000006
|
| 15 |
+
2023-10-31,17.494666666666667,44.458541666666655
|
| 16 |
+
2023-11-01,21.588095238095235,29.20631578947369
|
| 17 |
+
2023-11-02,9.745714285714286,48.39760869565216
|
| 18 |
+
2023-11-03,7.163243243243242,61.421599999999984
|
past_weather_data.csv
CHANGED
|
@@ -11,6 +11,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
|
|
| 11 |
2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
|
| 12 |
2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
|
| 13 |
2023-10-28,11.4,88.6,3.0,18.4,994.4,29.3,48.5
|
| 14 |
-
2023-10-29,13
|
| 15 |
-
2023-10-30,11.2,90.4,13
|
| 16 |
2023-10-31,11,93.7,18.6,18,1000.7,17.9,29.8
|
|
|
|
|
|
|
|
|
| 11 |
2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
|
| 12 |
2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
|
| 13 |
2023-10-28,11.4,88.6,3.0,18.4,994.4,29.3,48.5
|
| 14 |
+
2023-10-29,13,82.2,9.5,31.7,991.5,38.8,35.4
|
| 15 |
+
2023-10-30,11.2,90.4,13,18.4,997.5,28.8,27
|
| 16 |
2023-10-31,11,93.7,18.6,18,1000.7,17.9,29.8
|
| 17 |
+
2023-11-01,12.4,88.5,4.9,25.9,997.8,32.6,31.5
|
| 18 |
+
2023-11-02,11,80,8.7,46.4,976.4,33.6,21.5
|
pollution_data.csv
CHANGED
|
@@ -11,4 +11,5 @@ date,NO2,O3
|
|
| 11 |
2024-10-26,24.46423484380123,18.70331123489324
|
| 12 |
2024-10-27,27.53722134983982,20.80809239842384
|
| 13 |
2024-10-28,23.337567567567568,26.82861788617886
|
| 14 |
-
2024-10-29,16.53533209586906,23.282548876050039
|
|
|
|
|
|
| 11 |
2024-10-26,24.46423484380123,18.70331123489324
|
| 12 |
2024-10-27,27.53722134983982,20.80809239842384
|
| 13 |
2024-10-28,23.337567567567568,26.82861788617886
|
| 14 |
+
2024-10-29,16.53533209586906,23.282548876050039
|
| 15 |
+
2024-10-30,22.26162162162162,18.034435483870976
|
scalers/feature_scaler_NO2.joblib
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:4d2731707963125bbb452df55c91920c62eb745c7e78c0a79bdf7fab173b3369
|
| 3 |
-
size 5791
|
|
|
|
|
|
|
|
|
|
|
|
scalers/feature_scaler_O3.joblib
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:d6e30fc2c7ce7a00bc1b8db08e5f4ffa110136a796f55a68beedb479b07189f7
|
| 3 |
-
size 5023
|
|
|
|
|
|
|
|
|
|
|
|
scalers/target_scaler_NO2.joblib
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:255a0d1dd1d8673ce03e838e9fc1a7df4dab1248ca70f6cb73b66aea83ed6316
|
| 3 |
-
size 1023
|
|
|
|
|
|
|
|
|
|
|
|
scalers/target_scaler_O3.joblib
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:2ad485897b59228f1c1efd8c76cc2fa771d10efd379297f163ceba32dbacbab6
|
| 3 |
-
size 1023
|
|
|
|
|
|
|
|
|
|
|
|
src/data_api_calls.py
CHANGED
|
@@ -114,8 +114,6 @@ def update_pollution_data():
|
|
| 114 |
|
| 115 |
|
| 116 |
def get_combined_data():
|
| 117 |
-
update_weather_data()
|
| 118 |
-
update_pollution_data()
|
| 119 |
|
| 120 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
| 121 |
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def get_combined_data():
|
|
|
|
|
|
|
| 117 |
|
| 118 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
| 119 |
|
src/features_pipeline.py
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
|
|
| 1 |
import warnings
|
| 2 |
|
| 3 |
import joblib
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
| 6 |
from src.past_data_api_calls import get_past_combined_data
|
| 7 |
|
| 8 |
warnings.filterwarnings("ignore")
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def create_features(
|
| 12 |
data,
|
|
@@ -91,7 +98,10 @@ def create_features(
|
|
| 91 |
x = data[feature_cols]
|
| 92 |
|
| 93 |
# Scale
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
| 95 |
X_scaled = feature_scaler.transform(x)
|
| 96 |
|
| 97 |
# Convert scaled data back to DataFrame for consistency
|
|
|
|
| 1 |
+
import os
|
| 2 |
import warnings
|
| 3 |
|
| 4 |
import joblib
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from huggingface_hub import hf_hub_download, login
|
| 9 |
+
|
| 10 |
from src.past_data_api_calls import get_past_combined_data
|
| 11 |
|
| 12 |
warnings.filterwarnings("ignore")
|
| 13 |
|
| 14 |
+
load_dotenv()
|
| 15 |
+
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
| 16 |
+
|
| 17 |
|
| 18 |
def create_features(
|
| 19 |
data,
|
|
|
|
| 98 |
x = data[feature_cols]
|
| 99 |
|
| 100 |
# Scale
|
| 101 |
+
repo_id = f"elisaklunder/Utrecht-{target_particle}-Forecasting-Model"
|
| 102 |
+
file_name = f"feature_scaler_{target_particle}.joblib"
|
| 103 |
+
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
| 104 |
+
feature_scaler = joblib.load(path)
|
| 105 |
X_scaled = feature_scaler.transform(x)
|
| 106 |
|
| 107 |
# Convert scaled data back to DataFrame for consistency
|
src/predict.py
CHANGED
|
@@ -1,19 +1,23 @@
|
|
| 1 |
import os
|
| 2 |
-
from datetime import date, timedelta
|
| 3 |
|
| 4 |
import joblib
|
| 5 |
import pandas as pd
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from huggingface_hub import hf_hub_download, login
|
| 8 |
|
| 9 |
-
from src.data_api_calls import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from src.features_pipeline import create_features
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
def load_model(particle):
|
| 14 |
-
load_dotenv()
|
| 15 |
-
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
| 16 |
|
|
|
|
| 17 |
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
| 18 |
if particle == "O3":
|
| 19 |
file_name = "O3_svr_model.pkl"
|
|
@@ -29,13 +33,19 @@ def run_model(particle, data):
|
|
| 29 |
input_data = create_features(data=data, target_particle=particle)
|
| 30 |
model = load_model(particle)
|
| 31 |
prediction = model.predict(input_data)
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
prediction = target_scaler.inverse_transform(prediction)
|
|
|
|
| 34 |
return prediction
|
| 35 |
|
| 36 |
|
| 37 |
-
def
|
| 38 |
-
|
|
|
|
| 39 |
|
| 40 |
week_data = get_combined_data()
|
| 41 |
|
|
@@ -63,6 +73,8 @@ def get_data_and_predictions():
|
|
| 63 |
|
| 64 |
predictions_df = pd.DataFrame(prediction_data)
|
| 65 |
|
|
|
|
|
|
|
| 66 |
if os.path.exists(PREDICTIONS_FILE):
|
| 67 |
existing_data = pd.read_csv(PREDICTIONS_FILE)
|
| 68 |
# Filter out predictions made today to avoid duplicates
|
|
@@ -70,8 +82,28 @@ def get_data_and_predictions():
|
|
| 70 |
~(existing_data["date_predicted"] == str(date.today()))
|
| 71 |
]
|
| 72 |
combined_data = pd.concat([existing_data, predictions_df])
|
|
|
|
| 73 |
else:
|
| 74 |
combined_data = predictions_df
|
| 75 |
|
| 76 |
combined_data.to_csv(PREDICTIONS_FILE, index=False)
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
from datetime import date, datetime, timedelta
|
| 3 |
|
| 4 |
import joblib
|
| 5 |
import pandas as pd
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from huggingface_hub import hf_hub_download, login
|
| 8 |
|
| 9 |
+
from src.data_api_calls import (
|
| 10 |
+
get_combined_data,
|
| 11 |
+
update_pollution_data,
|
| 12 |
+
update_weather_data,
|
| 13 |
+
)
|
| 14 |
from src.features_pipeline import create_features
|
| 15 |
|
| 16 |
+
load_dotenv()
|
| 17 |
+
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
def load_model(particle):
|
| 21 |
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
| 22 |
if particle == "O3":
|
| 23 |
file_name = "O3_svr_model.pkl"
|
|
|
|
| 33 |
input_data = create_features(data=data, target_particle=particle)
|
| 34 |
model = load_model(particle)
|
| 35 |
prediction = model.predict(input_data)
|
| 36 |
+
|
| 37 |
+
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
| 38 |
+
file_name = f"target_scaler_{particle}.joblib"
|
| 39 |
+
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
| 40 |
+
target_scaler = joblib.load(path)
|
| 41 |
prediction = target_scaler.inverse_transform(prediction)
|
| 42 |
+
|
| 43 |
return prediction
|
| 44 |
|
| 45 |
|
| 46 |
+
def update_data_and_predictions():
|
| 47 |
+
update_weather_data()
|
| 48 |
+
update_pollution_data()
|
| 49 |
|
| 50 |
week_data = get_combined_data()
|
| 51 |
|
|
|
|
| 73 |
|
| 74 |
predictions_df = pd.DataFrame(prediction_data)
|
| 75 |
|
| 76 |
+
PREDICTIONS_FILE = "predictions_history.csv"
|
| 77 |
+
|
| 78 |
if os.path.exists(PREDICTIONS_FILE):
|
| 79 |
existing_data = pd.read_csv(PREDICTIONS_FILE)
|
| 80 |
# Filter out predictions made today to avoid duplicates
|
|
|
|
| 82 |
~(existing_data["date_predicted"] == str(date.today()))
|
| 83 |
]
|
| 84 |
combined_data = pd.concat([existing_data, predictions_df])
|
| 85 |
+
combined_data.drop_duplicates()
|
| 86 |
else:
|
| 87 |
combined_data = predictions_df
|
| 88 |
|
| 89 |
combined_data.to_csv(PREDICTIONS_FILE, index=False)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def get_data_and_predictions():
|
| 93 |
+
week_data = get_combined_data()
|
| 94 |
+
|
| 95 |
+
PREDICTIONS_FILE = "predictions_history.csv"
|
| 96 |
+
data = pd.read_csv(PREDICTIONS_FILE)
|
| 97 |
+
|
| 98 |
+
today = datetime.today().strftime("%Y-%m-%d")
|
| 99 |
+
today_predictions = data[(data["date_predicted"] == today)]
|
| 100 |
+
|
| 101 |
+
# Extract predictions for O3 and NO2
|
| 102 |
+
o3_predictions = today_predictions[today_predictions["pollutant"] == "O3"][
|
| 103 |
+
"prediction_value"
|
| 104 |
+
].values
|
| 105 |
+
no2_predictions = today_predictions[today_predictions["pollutant"] == "NO2"][
|
| 106 |
+
"prediction_value"
|
| 107 |
+
].values
|
| 108 |
+
|
| 109 |
+
return week_data, [o3_predictions], [no2_predictions]
|
weather_data.csv
CHANGED
|
@@ -8,6 +8,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
|
|
| 8 |
2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
|
| 9 |
2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
|
| 10 |
2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
|
| 11 |
-
2024-10-26,13.7,91.5,0.0,11.9,1016.3,23.3,8
|
| 12 |
-
2024-10-27,
|
| 13 |
-
2024-10-28,
|
|
|
|
|
|
|
|
|
| 8 |
2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
|
| 9 |
2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
|
| 10 |
2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
|
| 11 |
+
2024-10-26,13.7,91.5,0.0,11.9,1016.3,23.3,8
|
| 12 |
+
2024-10-27,12,90.9,0.1,13.7,1019.6,23.7,28.6
|
| 13 |
+
2024-10-28,10.5,92.8,1.7,19.4,1022.7,24,28.2
|
| 14 |
+
2024-10-29,13.8,95.9,0.2,20.5,1023.1,8.1,16
|
| 15 |
+
2024-10-30,12.7,92.5,0.6,9.4,1027.5,13.7,32.9
|