Spaces:

Mihkelmj
/

utrecht-pollution-prediction

Sleeping

App Files Files Community

elisaklunder commited on Oct 22, 2024

Commit

472271b

2 Parent(s): eeaf86d 5c6dd58

Merge branch 'elisa'

Browse files

Files changed (11) hide show

.gitignore +4 -0
README.md +0 -1
app.py +50 -41
daily_api__pollution.py +0 -0
requirements.txt +2 -1
scalers/target_scaler_NO2.joblib +3 -0
scalers/target_scaler_O3.joblib +3 -0
src/daily_api__pollution.py +161 -0
data_loading.py → src/data_loading.py +0 -0
helper_functions.py → src/helper_functions.py +0 -18
src/models_loading.py +37 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.venv/
+.env
+__pycache__/
+*.pyc

README.md CHANGED Viewed

@@ -11,4 +11,3 @@ short_description: 'Demo: Model to predict O3 and NO2 concentrations in Utrecht'
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
- hhhrhehheehehehe


11	---
12
13	Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,35 +1,32 @@
-import time
 import altair as alt
-import joblib
-import numpy as np
 import pandas as pd
-import streamlit as st
-from sklearn.linear_model import LinearRegression
-import matplotlib.pyplot as plt
 import plotly.graph_objects as go
-from helper_functions import custom_metric_box, pollution_box, run_model
 from data_api_calls import get_data
 st.set_page_config(
     page_title="Utrecht Pollution Dashboard",
     page_icon="🏂��🌱",
     layout="wide",
-    initial_sidebar_state="expanded")
 alt.themes.enable("dark")
-prediction = run_model()  # Assuming you have a function run_model()
 get_data()
 data = pd.read_csv("dataset.csv")
 # App Title
-st.title("Utrecht Pollution Dashboard 🌱")
-col1, col2 = st.columns((1,1))
 # Create a 3-column layout
 with col1:
-    st.subheader('Current Weather')
     col1, col2, col3 = st.columns(3)
     # First column
@@ -47,10 +44,10 @@ with col1:
         custom_metric_box(label="Solar Radiation", value="200 W/m²", delta="-20 W/m²")
         custom_metric_box(label="Wind Speed", value="15 km/h", delta="-2 km/h")
-    st.subheader('Current Pollution Levels')
-    col1, col2 = st.columns((1,1))
     # Display the prediction
-    #st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
     with col1:
         pollution_box(label="O<sub>3</sub>", value="37 µg/m³", delta="+2 µg/m³")
     with col2:
@@ -58,7 +55,9 @@ with col1:
 # Sample data (replace with your actual data)
 dates_past = pd.date_range(end=pd.Timestamp.today(), periods=7).to_list()
-dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
 # O3 and NO2 values for the past 7 days
 o3_past_values = [30, 32, 34, 33, 31, 35, 36]
@@ -74,61 +73,71 @@ o3_values = o3_past_values + o3_future_values
 no2_values = no2_past_values + no2_future_values
 # Create a DataFrame
-df = pd.DataFrame({
-    'Date': dates,
-    'O3': o3_values,
-    'NO2': no2_values
-})
-st.subheader('O3 and NO2 Prediction')
 # Create two columns for two separate graphs
 subcol1, subcol2 = st.columns(2)
 # Plot O3 in the first subcolumn
 with subcol1:
     fig_o3 = go.Figure()
-    fig_o3.add_trace(go.Scatter(x=df['Date'], y=df['O3'],
-                                mode='lines+markers',
-                                name='O3',
-                                line=dict(color='rgb(0, 191, 255)', width=4)))  # Bright blue
     # Add a vertical line for predictions (today's date)
     fig_o3.add_shape(
         dict(
             type="line",
-            x0=pd.Timestamp.today(), x1=pd.Timestamp.today(),
-            y0=min(o3_values), y1=max(o3_values),
             line=dict(color="White", width=3, dash="dash"),
         )
     )
     fig_o3.update_layout(
-        plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background
-        paper_bgcolor='rgba(0, 0, 0, 0)',  # Transparent paper background
         yaxis_title="O3 Concentration (µg/m³)",
         font=dict(size=14),
-        hovermode="x unified"
     )
     st.plotly_chart(fig_o3)
 # Plot NO2 in the second subcolumn
 with subcol2:
     fig_no2 = go.Figure()
-    fig_no2.add_trace(go.Scatter(x=df['Date'], y=df['NO2'],
-                                    mode='lines+markers',
-                                    name='NO2',
-                                    line=dict(color='rgb(255, 20, 147)', width=4)))  # Bright pink
     # Add a vertical line for predictions (today's date)
     fig_no2.add_shape(
         dict(
             type="line",
-            x0=pd.Timestamp.today(), x1=pd.Timestamp.today(),
-            y0=min(no2_values), y1=max(no2_values),
             line=dict(color="White", width=3, dash="dash"),
         )
     )
     fig_no2.update_layout(
-        plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background
-        paper_bgcolor='rgba(0, 0, 0, 0)',  # Transparent paper background
         yaxis_title="NO2 Concentration (µg/m³)",
         font=dict(size=14),
-        hovermode="x unified"
     )
-    st.plotly_chart(fig_no2)

 import altair as alt
 import pandas as pd
 import plotly.graph_objects as go
+import streamlit as st
+from src.helper_functions import custom_metric_box, pollution_box
+from src.models_loading import run_model
 from data_api_calls import get_data
 st.set_page_config(
     page_title="Utrecht Pollution Dashboard",
     page_icon="🏂��🌱",
     layout="wide",
+    initial_sidebar_state="expanded",
+)
 alt.themes.enable("dark")
+test_predictions = run_model("O3")
 get_data()
 data = pd.read_csv("dataset.csv")
 # App Title
+st.title("Utrecht Pollution Dashboard🌱")
+col1, col2 = st.columns((1, 1))
 # Create a 3-column layout
 with col1:
+    st.subheader("Current Weather")
     col1, col2, col3 = st.columns(3)
     # First column
         custom_metric_box(label="Solar Radiation", value="200 W/m²", delta="-20 W/m²")
         custom_metric_box(label="Wind Speed", value="15 km/h", delta="-2 km/h")
+    st.subheader("Current Pollution Levels")
+    col1, col2 = st.columns((1, 1))
     # Display the prediction
+    # st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
     with col1:
         pollution_box(label="O<sub>3</sub>", value="37 µg/m³", delta="+2 µg/m³")
     with col2:
 # Sample data (replace with your actual data)
 dates_past = pd.date_range(end=pd.Timestamp.today(), periods=7).to_list()
+dates_future = pd.date_range(
+    start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3
+).to_list()
 # O3 and NO2 values for the past 7 days
 o3_past_values = [30, 32, 34, 33, 31, 35, 36]
 no2_values = no2_past_values + no2_future_values
 # Create a DataFrame
+df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
+st.subheader("O3 and NO2 Prediction")
 # Create two columns for two separate graphs
 subcol1, subcol2 = st.columns(2)
 # Plot O3 in the first subcolumn
 with subcol1:
     fig_o3 = go.Figure()
+    fig_o3.add_trace(
+        go.Scatter(
+            x=df["Date"],
+            y=df["O3"],
+            mode="lines+markers",
+            name="O3",
+            line=dict(color="rgb(0, 191, 255)", width=4),
+        )
+    )  # Bright blue
     # Add a vertical line for predictions (today's date)
     fig_o3.add_shape(
         dict(
             type="line",
+            x0=pd.Timestamp.today(),
+            x1=pd.Timestamp.today(),
+            y0=min(o3_values),
+            y1=max(o3_values),
             line=dict(color="White", width=3, dash="dash"),
         )
     )
     fig_o3.update_layout(
+        plot_bgcolor="rgba(0, 0, 0, 0)",  # Transparent background
+        paper_bgcolor="rgba(0, 0, 0, 0)",  # Transparent paper background
         yaxis_title="O3 Concentration (µg/m³)",
         font=dict(size=14),
+        hovermode="x unified",
     )
     st.plotly_chart(fig_o3)
 # Plot NO2 in the second subcolumn
 with subcol2:
     fig_no2 = go.Figure()
+    fig_no2.add_trace(
+        go.Scatter(
+            x=df["Date"],
+            y=df["NO2"],
+            mode="lines+markers",
+            name="NO2",
+            line=dict(color="rgb(255, 20, 147)", width=4),
+        )
+    )  # Bright pink
     # Add a vertical line for predictions (today's date)
     fig_no2.add_shape(
         dict(
             type="line",
+            x0=pd.Timestamp.today(),
+            x1=pd.Timestamp.today(),
+            y0=min(no2_values),
+            y1=max(no2_values),
             line=dict(color="White", width=3, dash="dash"),
         )
     )
     fig_no2.update_layout(
+        plot_bgcolor="rgba(0, 0, 0, 0)",  # Transparent background
+        paper_bgcolor="rgba(0, 0, 0, 0)",  # Transparent paper background
         yaxis_title="NO2 Concentration (µg/m³)",
         font=dict(size=14),
+        hovermode="x unified",
     )
+    st.plotly_chart(fig_no2)

daily_api__pollution.py ADDED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ altair
 matplotlib
 plotly
 http.client
-datetime

 matplotlib
 plotly
 http.client
+datetime
+huggingface-hub

scalers/target_scaler_NO2.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:255a0d1dd1d8673ce03e838e9fc1a7df4dab1248ca70f6cb73b66aea83ed6316
+size 1023

scalers/target_scaler_O3.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad485897b59228f1c1efd8c76cc2fa771d10efd379297f163ceba32dbacbab6
+size 1023

src/daily_api__pollution.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import http.client
+from datetime import date, timedelta
+import pandas as pd
+from io import StringIO
+import os
+import re
+import csv
+def api_call():
+    particles = ["NO2", "O3"]
+    stations = ["NL10636", "NL10639", "NL10643"]
+    all_dataframes = []
+    today = date.today().isoformat() + "T09:00:00Z"
+    yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
+    latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
+    days_today = 0
+    days_yesterday = 1
+    while(today != latest_date):
+        days_today += 1
+        days_yesterday += 1
+        for particle in particles:
+            for station in stations:
+                conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
+                payload = ''
+                headers = {}
+                conn.request("GET", f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", payload, headers)
+                res = conn.getresponse()
+                data = res.read()
+                decoded_data = data.decode("utf-8")
+                df = pd.read_csv(StringIO(decoded_data))
+                df = df.filter(like='value')
+                all_dataframes.append(df)
+            combined_data = pd.concat(all_dataframes, ignore_index=True)
+            combined_data.to_csv(f'{particle}_{today}.csv', index=False)
+        today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
+        yesterday = (date.today() - timedelta(days_yesterday)).isoformat() + "T09:00:00Z"
+def delete_csv(csvs):
+    for csv in csvs:
+        if(os.path.exists(csv) and os.path.isfile(csv)):
+            os.remove(csv)
+def clean_values():
+    particles = ["NO2", "O3"]
+    csvs = []
+    NO2 = []
+    O3 = []
+    today = date.today().isoformat() + "T09:00:00Z"
+    yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
+    latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
+    days_today = 0
+    while(today != latest_date):
+        for particle in particles:
+            name = f'{particle}_{today}.csv'
+            csvs.append(name)
+        days_today += 1
+        today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
+    for csv_file in csvs:
+        values = []  # Reset values for each CSV file
+        # Open the CSV file and read the values
+        with open(csv_file, 'r') as file:
+            reader = csv.reader(file)
+            for row in reader:
+                for value in row:
+                    # Use regular expressions to extract numeric part
+                    cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value)
+                    if cleaned_value:  # If we successfully extract a number
+                        values.append(float(cleaned_value[0]))  # Convert the first match to float
+        # Compute the average if the values list is not empty
+        if values:
+            avg = sum(values) / len(values)
+            if "NO2" in csv_file:
+                NO2.append(avg)
+            else:
+                O3.append(avg)
+    delete_csv(csvs)
+    return NO2, O3
+def add_columns():
+    file_path = 'weather_data.csv'
+    df = pd.read_csv(file_path)
+    df.insert(1, 'NO2', None)
+    df.insert(2, 'O3', None)
+    df.insert(10, 'weekday', None)
+    df.to_csv('combined_data.csv', index=False)
+def scale():
+    file_path = 'combined_data.csv'
+    df = pd.read_csv(file_path)
+    columns = list(df.columns)
+    columns.insert(3, columns.pop(6))
+    df = df[columns]
+    columns.insert(5, columns.pop(9))
+    df = df[columns]
+    columns.insert(9, columns.pop(6))
+    df = df[columns]
+    df = df.rename(columns={
+        'datetime':'date',
+        'windspeed': 'wind_speed',
+        'temp': 'mean_temp',
+        'solarradiation':'global_radiation',
+        'precip':'percipitation',
+        'sealevelpressure':'pressure',
+        'visibility':'minimum_visibility'
+    })
+    df['date'] = pd.to_datetime(df['date'])
+    df['weekday'] = df['date'].dt.day_name()
+    df['wind_speed'] = (df['wind_speed'] / 3.6) * 10
+    df['mean_temp'] = df['mean_temp'] * 10
+    df['minimum_visibility'] = df['minimum_visibility'] * 10
+    df['percipitation'] = df['percipitation'] * 10
+    df['pressure'] = df['pressure'] * 10
+    df['wind_speed'] = df['wind_speed'].astype(int)
+    df['mean_temp'] = df['mean_temp'].astype(int)
+    df['minimum_visibility'] = df['minimum_visibility'].astype(int)
+    df['percipitation'] = df['percipitation'].astype(int)
+    df['pressure'] = df['pressure'].astype(int)
+    df['humidity'] = df['humidity'].astype(int)
+    df['global_radiation'] = df['global_radiation'].astype(int)
+    df.to_csv('recorded_data.csv', index=False)
+def insert_pollution(NO2, O3):
+    file_path = 'recorded_data.csv'
+    df = pd.read_csv(file_path)
+    start_index = 0
+    while NO2:
+        df.loc[start_index, 'NO2'] = NO2.pop()
+        start_index += 1
+    start_index = 0
+    while O3:
+        df.loc[start_index, 'O3'] = O3.pop()
+        start_index += 1
+        df.to_csv('recorded_data.csv', index=False)
+api_call()
+NO2, O3 = clean_values()
+add_columns()
+scale()
+insert_pollution(NO2, O3)
+os.remove('combined_data.csv')
+os.remove('weather_data.csv')

data_loading.py → src/data_loading.py RENAMED Viewed

File without changes

helper_functions.py → src/helper_functions.py RENAMED Viewed

@@ -1,22 +1,4 @@
 import streamlit as st
-import joblib
-import pandas as pd
-@st.cache_resource(ttl=6*300)  # Reruns every 6 hours
-def run_model():
-    # Load or train your model (pretrained model in this case)
-    model = joblib.load("linear_regression_model.pkl")
-    # Static input values
-    input_data = pd.DataFrame({
-        'Temperature': [20.0],
-        'Wind Speed': [10.0],
-        'Humidity': [50.0]
-    })
-    # Run the model with static input
-    prediction = model.predict(input_data)
-    return prediction
 # Custom function to create styled metric boxes with subscripts, smaller label, and larger metric
 def custom_metric_box(label, value, delta):

 import streamlit as st
 # Custom function to create styled metric boxes with subscripts, smaller label, and larger metric
 def custom_metric_box(label, value, delta):

src/models_loading.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import joblib
+import pandas as pd
+import streamlit as st
+from dotenv import load_dotenv
+from huggingface_hub import hf_hub_download, login
+def load_model(particle):
+    load_dotenv()
+    login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
+    repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
+    if particle == "O3":
+        file_name = "O3_svr_model.pkl"
+    elif particle == "NO2":
+        file_name == "hehehe"
+    model_path = hf_hub_download(repo_id=repo_id, filename=file_name)
+    model = joblib.load(model_path)
+    return model
+@st.cache_resource(ttl=6 * 300)  # Reruns every 6 hours
+def run_model(particle):
+    model = load_model(particle)
+    # Static input values
+    input_data = pd.DataFrame(
+        {"Temperature": [20.0], "Wind Speed": [10.0], "Humidity": [50.0]}
+    )
+    # Run the model with static input
+    prediction = model.predict(input_data)
+    return prediction