Spaces:

Mihkelmj
/

utrecht-pollution-prediction

Sleeping

App Files Files Community

elisaklunder commited on Oct 30, 2024

Commit

5a87062

2 Parent(s): a4294eb 14fc71f

Merge branch 'caching'

Browse files

Files changed (13) hide show

.gitignore +3 -1
app.py +3 -1
past_pollution_data.csv +9 -7
past_weather_data.csv +4 -2
pollution_data.csv +2 -1
scalers/feature_scaler_NO2.joblib +0 -3
scalers/feature_scaler_O3.joblib +0 -3
scalers/target_scaler_NO2.joblib +0 -3
scalers/target_scaler_O3.joblib +0 -3
src/data_api_calls.py +0 -2
src/features_pipeline.py +11 -1
src/predict.py +41 -9
weather_data.csv +5 -3

.gitignore CHANGED Viewed

@@ -1,4 +1,6 @@
 .venv/
 .env
 __pycache__/
-*.pyc

 .venv/
 .env
 __pycache__/
+*.pyc
+*.joblib
+scalers/

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import plotly.graph_objects as go
 import streamlit as st
 from src.helper_functions import custom_metric_box, pollution_box
-from src.predict import get_data_and_predictions
 st.set_page_config(
     page_title="Utrecht Pollution Dashboard ",
@@ -15,6 +15,8 @@ st.set_page_config(
 alt.themes.enable("dark")
 week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
 today = week_data.iloc[-1]

 import streamlit as st
 from src.helper_functions import custom_metric_box, pollution_box
+from src.predict import get_data_and_predictions, update_data_and_predictions
 st.set_page_config(
     page_title="Utrecht Pollution Dashboard ",
 alt.themes.enable("dark")
+update_data_and_predictions()
 week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
 today = week_data.iloc[-1]

past_pollution_data.csv CHANGED Viewed

@@ -4,13 +4,15 @@ date,NO2,O3
 2023-10-20,17.233055555555563,18.7156
 2023-10-21,15.023599999999991,22.04
 2023-10-22,8.723378378378372,48.33439999999999
-2023-10-23,20.634266666666676,15.586000000000002
-2023-10-24,15.1156,24.628085106382972
 2023-10-25,22.88567567567568,27.117599999999992
-2023-10-26,21.531756756756756,13.3216
 2023-10-27,23.07226666666666,16.15416666666666
 2023-10-28,24.89121621621622,24.59040816326531
-2023-10-29,9.724428571428572,51.5252
-2023-10-30,11.202054794520548,52.820600000000006
-2023-10-31,17.494666666666664,44.45854166666667
-2023-11-01,21.58809523809524,29.20631578947368

 2023-10-20,17.233055555555563,18.7156
 2023-10-21,15.023599999999991,22.04
 2023-10-22,8.723378378378372,48.33439999999999
+2023-10-23,20.63426666666668,15.586000000000002
+2023-10-24,15.1156,24.62808510638297
 2023-10-25,22.88567567567568,27.117599999999992
+2023-10-26,21.53175675675676,13.3216
 2023-10-27,23.07226666666666,16.15416666666666
 2023-10-28,24.89121621621622,24.59040816326531
+2023-10-29,9.724428571428573,51.525200000000005
+2023-10-30,11.20205479452055,52.820600000000006
+2023-10-31,17.494666666666667,44.458541666666655
+2023-11-01,21.588095238095235,29.20631578947369
+2023-11-02,9.745714285714286,48.39760869565216
+2023-11-03,7.163243243243242,61.421599999999984

past_weather_data.csv CHANGED Viewed

@@ -11,6 +11,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
 2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
 2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
 2023-10-28,11.4,88.6,3.0,18.4,994.4,29.3,48.5
-2023-10-29,13.0,82.2,9.5,31.7,991.5,38.8,35.4
-2023-10-30,11.2,90.4,13.0,18.4,997.5,28.8,27.0
 2023-10-31,11,93.7,18.6,18,1000.7,17.9,29.8

 2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
 2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
 2023-10-28,11.4,88.6,3.0,18.4,994.4,29.3,48.5
+2023-10-29,13,82.2,9.5,31.7,991.5,38.8,35.4
+2023-10-30,11.2,90.4,13,18.4,997.5,28.8,27
 2023-10-31,11,93.7,18.6,18,1000.7,17.9,29.8
+2023-11-01,12.4,88.5,4.9,25.9,997.8,32.6,31.5
+2023-11-02,11,80,8.7,46.4,976.4,33.6,21.5

pollution_data.csv CHANGED Viewed

@@ -11,4 +11,5 @@ date,NO2,O3
 2024-10-26,24.46423484380123,18.70331123489324
 2024-10-27,27.53722134983982,20.80809239842384
 2024-10-28,23.337567567567568,26.82861788617886
-2024-10-29,16.53533209586906,23.282548876050039

 2024-10-26,24.46423484380123,18.70331123489324
 2024-10-27,27.53722134983982,20.80809239842384
 2024-10-28,23.337567567567568,26.82861788617886
+2024-10-29,16.53533209586906,23.282548876050039
+2024-10-30,22.26162162162162,18.034435483870976

scalers/feature_scaler_NO2.joblib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4d2731707963125bbb452df55c91920c62eb745c7e78c0a79bdf7fab173b3369
-size 5791

scalers/feature_scaler_O3.joblib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d6e30fc2c7ce7a00bc1b8db08e5f4ffa110136a796f55a68beedb479b07189f7
-size 5023

scalers/target_scaler_NO2.joblib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:255a0d1dd1d8673ce03e838e9fc1a7df4dab1248ca70f6cb73b66aea83ed6316
-size 1023

scalers/target_scaler_O3.joblib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2ad485897b59228f1c1efd8c76cc2fa771d10efd379297f163ceba32dbacbab6
-size 1023

src/data_api_calls.py CHANGED Viewed

@@ -114,8 +114,6 @@ def update_pollution_data():
 def get_combined_data():
-    update_weather_data()
-    update_pollution_data()
     weather_df = pd.read_csv(WEATHER_DATA_FILE)


114
115
116	def get_combined_data():


117
118	weather_df = pd.read_csv(WEATHER_DATA_FILE)
119

src/features_pipeline.py CHANGED Viewed

@@ -1,12 +1,19 @@
 import warnings
 import joblib
 import numpy as np
 import pandas as pd
 from src.past_data_api_calls import get_past_combined_data
 warnings.filterwarnings("ignore")
 def create_features(
     data,
@@ -91,7 +98,10 @@ def create_features(
     x = data[feature_cols]
     # Scale
-    feature_scaler = joblib.load(f"scalers/feature_scaler_{target_particle}.joblib")
     X_scaled = feature_scaler.transform(x)
     # Convert scaled data back to DataFrame for consistency

+import os
 import warnings
 import joblib
 import numpy as np
 import pandas as pd
+from dotenv import load_dotenv
+from huggingface_hub import hf_hub_download, login
 from src.past_data_api_calls import get_past_combined_data
 warnings.filterwarnings("ignore")
+load_dotenv()
+login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
 def create_features(
     data,
     x = data[feature_cols]
     # Scale
+    repo_id = f"elisaklunder/Utrecht-{target_particle}-Forecasting-Model"
+    file_name = f"feature_scaler_{target_particle}.joblib"
+    path = hf_hub_download(repo_id=repo_id, filename=file_name)
+    feature_scaler = joblib.load(path)
     X_scaled = feature_scaler.transform(x)
     # Convert scaled data back to DataFrame for consistency

src/predict.py CHANGED Viewed

@@ -1,19 +1,23 @@
 import os
-from datetime import date, timedelta
 import joblib
 import pandas as pd
 from dotenv import load_dotenv
 from huggingface_hub import hf_hub_download, login
-from src.data_api_calls import get_combined_data
 from src.features_pipeline import create_features
-def load_model(particle):
-    load_dotenv()
-    login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
     repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
     if particle == "O3":
         file_name = "O3_svr_model.pkl"
@@ -29,13 +33,19 @@ def run_model(particle, data):
     input_data = create_features(data=data, target_particle=particle)
     model = load_model(particle)
     prediction = model.predict(input_data)
-    target_scaler = joblib.load(f"scalers/target_scaler_{particle}.joblib")
     prediction = target_scaler.inverse_transform(prediction)
     return prediction
-def get_data_and_predictions():
-    PREDICTIONS_FILE = "predictions_history.csv"
     week_data = get_combined_data()
@@ -63,6 +73,8 @@ def get_data_and_predictions():
     predictions_df = pd.DataFrame(prediction_data)
     if os.path.exists(PREDICTIONS_FILE):
         existing_data = pd.read_csv(PREDICTIONS_FILE)
         # Filter out predictions made today to avoid duplicates
@@ -70,8 +82,28 @@ def get_data_and_predictions():
             ~(existing_data["date_predicted"] == str(date.today()))
         ]
         combined_data = pd.concat([existing_data, predictions_df])
     else:
         combined_data = predictions_df
     combined_data.to_csv(PREDICTIONS_FILE, index=False)
-    return week_data, o3_predictions, no2_predictions

 import os
+from datetime import date, datetime, timedelta
 import joblib
 import pandas as pd
 from dotenv import load_dotenv
 from huggingface_hub import hf_hub_download, login
+from src.data_api_calls import (
+    get_combined_data,
+    update_pollution_data,
+    update_weather_data,
+)
 from src.features_pipeline import create_features
+load_dotenv()
+login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
+def load_model(particle):
     repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
     if particle == "O3":
         file_name = "O3_svr_model.pkl"
     input_data = create_features(data=data, target_particle=particle)
     model = load_model(particle)
     prediction = model.predict(input_data)
+    repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
+    file_name = f"target_scaler_{particle}.joblib"
+    path = hf_hub_download(repo_id=repo_id, filename=file_name)
+    target_scaler = joblib.load(path)
     prediction = target_scaler.inverse_transform(prediction)
     return prediction
+def update_data_and_predictions():
+    update_weather_data()
+    update_pollution_data()
     week_data = get_combined_data()
     predictions_df = pd.DataFrame(prediction_data)
+    PREDICTIONS_FILE = "predictions_history.csv"
     if os.path.exists(PREDICTIONS_FILE):
         existing_data = pd.read_csv(PREDICTIONS_FILE)
         # Filter out predictions made today to avoid duplicates
             ~(existing_data["date_predicted"] == str(date.today()))
         ]
         combined_data = pd.concat([existing_data, predictions_df])
+        combined_data.drop_duplicates()
     else:
         combined_data = predictions_df
     combined_data.to_csv(PREDICTIONS_FILE, index=False)
+def get_data_and_predictions():
+    week_data = get_combined_data()
+    PREDICTIONS_FILE = "predictions_history.csv"
+    data = pd.read_csv(PREDICTIONS_FILE)
+    today = datetime.today().strftime("%Y-%m-%d")
+    today_predictions = data[(data["date_predicted"] == today)]
+    # Extract predictions for O3 and NO2
+    o3_predictions = today_predictions[today_predictions["pollutant"] == "O3"][
+        "prediction_value"
+    ].values
+    no2_predictions = today_predictions[today_predictions["pollutant"] == "NO2"][
+        "prediction_value"
+    ].values
+    return week_data, [o3_predictions], [no2_predictions]

weather_data.csv CHANGED Viewed

@@ -8,6 +8,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
 2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
 2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
 2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
-2024-10-26,13.7,91.5,0.0,11.9,1016.3,23.3,8.0
-2024-10-27,13.2,87.1,0.1,20.5,1019.4,10.4,28.6
-2024-10-28,12.4,91.8,1.1,31.7,1021.8,12.8,27.3

 2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
 2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
 2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
+2024-10-26,13.7,91.5,0.0,11.9,1016.3,23.3,8
+2024-10-27,12,90.9,0.1,13.7,1019.6,23.7,28.6
+2024-10-28,10.5,92.8,1.7,19.4,1022.7,24,28.2
+2024-10-29,13.8,95.9,0.2,20.5,1023.1,8.1,16
+2024-10-30,12.7,92.5,0.6,9.4,1027.5,13.7,32.9