{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from src.predict import get_data_and_predictions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Length of values (0) does not match length of index (11)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m week_data, predictions_O3, predictions_NO2 \u001b[38;5;241m=\u001b[39m \u001b[43mget_data_and_predictions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:41\u001b[0m, in \u001b[0;36mget_data_and_predictions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 37\u001b[0m PREDICTIONS_FILE \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions_history.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 39\u001b[0m week_data \u001b[38;5;241m=\u001b[39m get_combined_data()\n\u001b[1;32m---> 41\u001b[0m o3_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweek_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 42\u001b[0m no2_predictions \u001b[38;5;241m=\u001b[39m run_model(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNO2\u001b[39m\u001b[38;5;124m\"\u001b[39m, data\u001b[38;5;241m=\u001b[39mweek_data)\n\u001b[0;32m 44\u001b[0m prediction_data \u001b[38;5;241m=\u001b[39m []\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[1;34m(particle, data)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[1;32m---> 28\u001b[0m input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 29\u001b[0m model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[0;32m 30\u001b[0m prediction \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(input_data)\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\features_pipeline.py:60\u001b[0m, in \u001b[0;36mcreate_features\u001b[1;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[0;32m 55\u001b[0m data[\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfeature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_sma_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msma_days\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 56\u001b[0m data[feature]\u001b[38;5;241m.\u001b[39mrolling(window\u001b[38;5;241m=\u001b[39msma_days)\u001b[38;5;241m.\u001b[39mmean()\n\u001b[0;32m 57\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[38;5;66;03m# Create particle data (NO2 and O3) from the same time last year\u001b[39;00m\n\u001b[1;32m---> 60\u001b[0m past_data \u001b[38;5;241m=\u001b[39m \u001b[43mget_past_combined_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 62\u001b[0m \u001b[38;5;66;03m# Today last year\u001b[39;00m\n\u001b[0;32m 63\u001b[0m data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3_last_year\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m past_data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39miloc[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m4\u001b[39m]\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\past_data_api_calls.py:99\u001b[0m, in \u001b[0;36mget_past_combined_data\u001b[1;34m()\u001b[0m\n\u001b[0;32m 96\u001b[0m NO2_df, O3_df \u001b[38;5;241m=\u001b[39m get_past_pollution_data()\n\u001b[0;32m 98\u001b[0m combined_df \u001b[38;5;241m=\u001b[39m weather_df\n\u001b[1;32m---> 99\u001b[0m \u001b[43mcombined_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNO2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m NO2_df\n\u001b[0;32m 100\u001b[0m combined_df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m O3_df\n\u001b[0;32m 102\u001b[0m \u001b[38;5;66;03m# Apply scaling and renaming similar to the scale function from previous code\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4311\u001b[0m, in \u001b[0;36mDataFrame.__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 4308\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_setitem_array([key], value)\n\u001b[0;32m 4309\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 4310\u001b[0m \u001b[38;5;66;03m# set column\u001b[39;00m\n\u001b[1;32m-> 4311\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_item\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4524\u001b[0m, in \u001b[0;36mDataFrame._set_item\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 4514\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_set_item\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, value) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 4515\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4516\u001b[0m \u001b[38;5;124;03m Add series to DataFrame in specified column.\u001b[39;00m\n\u001b[0;32m 4517\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4522\u001b[0m \u001b[38;5;124;03m ensure homogeneity.\u001b[39;00m\n\u001b[0;32m 4523\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4524\u001b[0m value, refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sanitize_column\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4526\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 4527\u001b[0m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[0;32m 4528\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 4529\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value\u001b[38;5;241m.\u001b[39mdtype, ExtensionDtype)\n\u001b[0;32m 4530\u001b[0m ):\n\u001b[0;32m 4531\u001b[0m \u001b[38;5;66;03m# broadcast across multiple columns if necessary\u001b[39;00m\n\u001b[0;32m 4532\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mis_unique \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns, MultiIndex):\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:5266\u001b[0m, in \u001b[0;36mDataFrame._sanitize_column\u001b[1;34m(self, value)\u001b[0m\n\u001b[0;32m 5263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _reindex_for_setitem(value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex)\n\u001b[0;32m 5265\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_list_like(value):\n\u001b[1;32m-> 5266\u001b[0m \u001b[43mcom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequire_length_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 5267\u001b[0m arr \u001b[38;5;241m=\u001b[39m sanitize_array(value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, allow_2d\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 5268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 5269\u001b[0m \u001b[38;5;28misinstance\u001b[39m(value, Index)\n\u001b[0;32m 5270\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobject\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 5273\u001b[0m \u001b[38;5;66;03m# TODO: Remove kludge in sanitize_array for string mode when enforcing\u001b[39;00m\n\u001b[0;32m 5274\u001b[0m \u001b[38;5;66;03m# this deprecation\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\common.py:573\u001b[0m, in \u001b[0;36mrequire_length_match\u001b[1;34m(data, index)\u001b[0m\n\u001b[0;32m 569\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 570\u001b[0m \u001b[38;5;124;03mCheck the length of data matches the length of the index.\u001b[39;00m\n\u001b[0;32m 571\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 572\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(index):\n\u001b[1;32m--> 573\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 574\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLength of values \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 575\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 576\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdoes not match length of index \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 577\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(index)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 578\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: Length of values (0) does not match length of index (11)"
]
}
],
"source": [
"week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"week_data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"dataset.csv\")\n",
"target_particle = \"O3\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" NO2 | \n",
" O3 | \n",
" wind_speed | \n",
" mean_temp | \n",
" global_radiation | \n",
" percipitation | \n",
" pressure | \n",
" minimum_visibility | \n",
" humidity | \n",
" weekday | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2024-10-16 | \n",
" 22.602712 | \n",
" 22.881288 | \n",
" 61 | \n",
" 151 | \n",
" 40 | \n",
" 0 | \n",
" 10103 | \n",
" 358 | \n",
" 82 | \n",
" Wednesday | \n",
"
\n",
" \n",
" 1 | \n",
" 2024-10-17 | \n",
" 23.104327 | \n",
" 23.038638 | \n",
" 51 | \n",
" 169 | \n",
" 43 | \n",
" 6 | \n",
" 10100 | \n",
" 371 | \n",
" 86 | \n",
" Thursday | \n",
"
\n",
" \n",
" 2 | \n",
" 2024-10-18 | \n",
" 23.682857 | \n",
" 23.716611 | \n",
" 21 | \n",
" 156 | \n",
" 42 | \n",
" 39 | \n",
" 10140 | \n",
" 64 | \n",
" 97 | \n",
" Friday | \n",
"
\n",
" \n",
" 3 | \n",
" 2024-10-19 | \n",
" 24.532039 | \n",
" 23.604723 | \n",
" 43 | \n",
" 147 | \n",
" 43 | \n",
" 28 | \n",
" 10140 | \n",
" 236 | \n",
" 92 | \n",
" Saturday | \n",
"
\n",
" \n",
" 4 | \n",
" 2024-10-20 | \n",
" 23.019102 | \n",
" 24.173377 | \n",
" 68 | \n",
" 145 | \n",
" 0 | \n",
" 0 | \n",
" 10160 | \n",
" 241 | \n",
" 82 | \n",
" Sunday | \n",
"
\n",
" \n",
" 5 | \n",
" 2024-10-21 | \n",
" 21.275629 | \n",
" 25.058736 | \n",
" 58 | \n",
" 144 | \n",
" 27 | \n",
" 43 | \n",
" 10206 | \n",
" 220 | \n",
" 92 | \n",
" Monday | \n",
"
\n",
" \n",
" 6 | \n",
" 2024-10-22 | \n",
" 22.334375 | \n",
" 24.594219 | \n",
" 76 | \n",
" 123 | \n",
" 57 | \n",
" 12 | \n",
" 10265 | \n",
" 100 | \n",
" 87 | \n",
" Tuesday | \n",
"
\n",
" \n",
" 7 | \n",
" 2024-10-23 | \n",
" 24.261733 | \n",
" 23.560000 | \n",
" 31 | \n",
" 115 | \n",
" 7 | \n",
" 0 | \n",
" 10328 | \n",
" 105 | \n",
" 95 | \n",
" Wednesday | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date NO2 O3 wind_speed mean_temp global_radiation \\\n",
"0 2024-10-16 22.602712 22.881288 61 151 40 \n",
"1 2024-10-17 23.104327 23.038638 51 169 43 \n",
"2 2024-10-18 23.682857 23.716611 21 156 42 \n",
"3 2024-10-19 24.532039 23.604723 43 147 43 \n",
"4 2024-10-20 23.019102 24.173377 68 145 0 \n",
"5 2024-10-21 21.275629 25.058736 58 144 27 \n",
"6 2024-10-22 22.334375 24.594219 76 123 57 \n",
"7 2024-10-23 24.261733 23.560000 31 115 7 \n",
"\n",
" percipitation pressure minimum_visibility humidity weekday \n",
"0 0 10103 358 82 Wednesday \n",
"1 6 10100 371 86 Thursday \n",
"2 39 10140 64 97 Friday \n",
"3 28 10140 236 92 Saturday \n",
"4 0 10160 241 82 Sunday \n",
"5 43 10206 220 92 Monday \n",
"6 12 10265 100 87 Tuesday \n",
"7 0 10328 105 95 Wednesday "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of rows with missing values dropped: 7\n"
]
}
],
"source": [
"input_data = create_features(\n",
" data=data,\n",
" target_particle=target_particle,\n",
" lag_days=7,\n",
" sma_days=7,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" NO2 | \n",
" O3 | \n",
" wind_speed | \n",
" mean_temp | \n",
" global_radiation | \n",
" percipitation | \n",
" pressure | \n",
" minimum_visibility | \n",
" humidity | \n",
" weekday_sin | \n",
" ... | \n",
" O3_last_year_4_days_before | \n",
" NO2_last_year_4_days_before | \n",
" O3_last_year_5_days_before | \n",
" NO2_last_year_5_days_before | \n",
" O3_last_year_6_days_before | \n",
" NO2_last_year_6_days_before | \n",
" O3_last_year_7_days_before | \n",
" NO2_last_year_7_days_before | \n",
" O3_last_year_3_days_after | \n",
" NO2_last_year_3_days_after | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -0.126371 | \n",
" -0.855455 | \n",
" -0.206181 | \n",
" 0.082314 | \n",
" -1.330268 | \n",
" -0.493936 | \n",
" 1.783274 | \n",
" 2.813837 | \n",
" 1.547919 | \n",
" 1.37753 | \n",
" ... | \n",
" -1.036205 | \n",
" -0.802392 | \n",
" -0.883032 | \n",
" -0.968984 | \n",
" 0.333776 | \n",
" -1.446199 | \n",
" -1.180992 | \n",
" -0.54567 | \n",
" -1.15814 | \n",
" -0.358079 | \n",
"
\n",
" \n",
"
\n",
"
1 rows × 87 columns
\n",
"
"
],
"text/plain": [
" NO2 O3 wind_speed mean_temp global_radiation percipitation \\\n",
"0 -0.126371 -0.855455 -0.206181 0.082314 -1.330268 -0.493936 \n",
"\n",
" pressure minimum_visibility humidity weekday_sin ... \\\n",
"0 1.783274 2.813837 1.547919 1.37753 ... \n",
"\n",
" O3_last_year_4_days_before NO2_last_year_4_days_before \\\n",
"0 -1.036205 -0.802392 \n",
"\n",
" O3_last_year_5_days_before NO2_last_year_5_days_before \\\n",
"0 -0.883032 -0.968984 \n",
"\n",
" O3_last_year_6_days_before NO2_last_year_6_days_before \\\n",
"0 0.333776 -1.446199 \n",
"\n",
" O3_last_year_7_days_before NO2_last_year_7_days_before \\\n",
"0 -1.180992 -0.54567 \n",
"\n",
" O3_last_year_3_days_after NO2_last_year_3_days_after \n",
"0 -1.15814 -0.358079 \n",
"\n",
"[1 rows x 87 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"input_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#prediction = run_model(particle=\"O3\", data=df)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" NO2 | \n",
" O3 | \n",
" wind_speed | \n",
" mean_temp | \n",
" global_radiation | \n",
" percipitation | \n",
" pressure | \n",
" minimum_visibility | \n",
" humidity | \n",
" weekday | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-10-16 | \n",
" 17.958784 | \n",
" 32.611400 | \n",
" 31 | \n",
" 90 | \n",
" 68 | \n",
" 9 | \n",
" 1022 | \n",
" 348 | \n",
" 88 | \n",
" Monday | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-10-17 | \n",
" 10.842703 | \n",
" 39.812600 | \n",
" 61 | \n",
" 85 | \n",
" 75 | \n",
" 0 | \n",
" 1019 | \n",
" 348 | \n",
" 84 | \n",
" Tuesday | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-10-18 | \n",
" 17.970267 | \n",
" 31.779024 | \n",
" 71 | \n",
" 90 | \n",
" 71 | \n",
" 23 | \n",
" 1006 | \n",
" 238 | \n",
" 77 | \n",
" Wednesday | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-10-19 | \n",
" 17.233056 | \n",
" 18.715600 | \n",
" 61 | \n",
" 145 | \n",
" 39 | \n",
" 114 | \n",
" 990 | \n",
" 212 | \n",
" 94 | \n",
" Thursday | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-10-20 | \n",
" 15.023600 | \n",
" 22.040000 | \n",
" 71 | \n",
" 119 | \n",
" 7 | \n",
" 204 | \n",
" 981 | \n",
" 104 | \n",
" 97 | \n",
" Friday | \n",
"
\n",
" \n",
" 5 | \n",
" 2023-10-21 | \n",
" 8.723378 | \n",
" 48.334400 | \n",
" 61 | \n",
" 131 | \n",
" 39 | \n",
" 35 | \n",
" 989 | \n",
" 277 | \n",
" 88 | \n",
" Saturday | \n",
"
\n",
" \n",
" 6 | \n",
" 2023-10-22 | \n",
" 20.634267 | \n",
" 15.586000 | \n",
" 71 | \n",
" 121 | \n",
" 55 | \n",
" 39 | \n",
" 1003 | \n",
" 323 | \n",
" 87 | \n",
" Sunday | \n",
"
\n",
" \n",
" 7 | \n",
" 2023-10-23 | \n",
" 15.115600 | \n",
" 24.628085 | \n",
" 50 | \n",
" 99 | \n",
" 43 | \n",
" 5 | \n",
" 1011 | \n",
" 59 | \n",
" 95 | \n",
" Monday | \n",
"
\n",
" \n",
" 8 | \n",
" 2023-10-24 | \n",
" 22.885676 | \n",
" 27.117600 | \n",
" 61 | \n",
" 116 | \n",
" 32 | \n",
" 65 | \n",
" 1001 | \n",
" 231 | \n",
" 92 | \n",
" Tuesday | \n",
"
\n",
" \n",
" 9 | \n",
" 2023-10-25 | \n",
" 21.531757 | \n",
" 13.321600 | \n",
" 50 | \n",
" 93 | \n",
" 14 | \n",
" 153 | \n",
" 996 | \n",
" 157 | \n",
" 96 | \n",
" Wednesday | \n",
"
\n",
" \n",
" 10 | \n",
" 2023-10-26 | \n",
" 23.072267 | \n",
" 16.154167 | \n",
" 31 | \n",
" 94 | \n",
" 36 | \n",
" 1 | \n",
" 995 | \n",
" 48 | \n",
" 97 | \n",
" Thursday | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date NO2 O3 wind_speed mean_temp global_radiation \\\n",
"0 2023-10-16 17.958784 32.611400 31 90 68 \n",
"1 2023-10-17 10.842703 39.812600 61 85 75 \n",
"2 2023-10-18 17.970267 31.779024 71 90 71 \n",
"3 2023-10-19 17.233056 18.715600 61 145 39 \n",
"4 2023-10-20 15.023600 22.040000 71 119 7 \n",
"5 2023-10-21 8.723378 48.334400 61 131 39 \n",
"6 2023-10-22 20.634267 15.586000 71 121 55 \n",
"7 2023-10-23 15.115600 24.628085 50 99 43 \n",
"8 2023-10-24 22.885676 27.117600 61 116 32 \n",
"9 2023-10-25 21.531757 13.321600 50 93 14 \n",
"10 2023-10-26 23.072267 16.154167 31 94 36 \n",
"\n",
" percipitation pressure minimum_visibility humidity weekday \n",
"0 9 1022 348 88 Monday \n",
"1 0 1019 348 84 Tuesday \n",
"2 23 1006 238 77 Wednesday \n",
"3 114 990 212 94 Thursday \n",
"4 204 981 104 97 Friday \n",
"5 35 989 277 88 Saturday \n",
"6 39 1003 323 87 Sunday \n",
"7 5 1011 59 95 Monday \n",
"8 65 1001 231 92 Tuesday \n",
"9 153 996 157 96 Wednesday \n",
"10 1 995 48 97 Thursday "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_past_data()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-10-23 19:40:20.321 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
"2024-10-23 19:40:20.322 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
"2024-10-23 19:40:20.323 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of rows with missing values dropped: 7\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-10-23 19:40:34.183 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
"2024-10-23 19:40:34.184 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
]
}
],
"source": [
"prediction=run_model(particle=target_particle, data=data)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[19.90814701, 8.8039613 , 26.57711386]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}