|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import plotly.express as px |
|
from sklearn.ensemble import IsolationForest |
|
from sklearn.preprocessing import StandardScaler |
|
|
|
|
|
st.set_page_config(page_title="Energy Consumption Anomaly Detection", layout="wide") |
|
|
|
st.title("β‘ Energy Consumption Anomaly Detection Dashboard") |
|
|
|
|
|
file_path = "./World Energy Consumption.csv" |
|
df = pd.read_csv(file_path) |
|
|
|
|
|
st.sidebar.header("Settings") |
|
contamination = st.sidebar.slider("Select Contamination (Anomaly Percentage)", 0.01, 0.1, 0.02, 0.01) |
|
|
|
|
|
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist() |
|
df = df.dropna(subset=numerical_cols) |
|
scaler = StandardScaler() |
|
scaled_data = scaler.fit_transform(df[numerical_cols]) |
|
|
|
|
|
model = IsolationForest(contamination=contamination, random_state=42) |
|
df["Anomaly"] = model.fit_predict(scaled_data) |
|
df["Anomaly"] = df["Anomaly"].map({1: "Normal", -1: "Anomaly"}) |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["π Dataset Overview", "π Anomaly Detection", "π Visualization"]) |
|
|
|
|
|
with tab1: |
|
st.subheader("Dataset Overview") |
|
st.write("This dataset contains information on global energy consumption trends.") |
|
st.dataframe(df.head()) |
|
st.write(f"Total Records: {df.shape[0]}") |
|
|
|
|
|
with tab2: |
|
st.subheader("Anomaly Detection Results") |
|
anomalies = df[df["Anomaly"] == "Anomaly"] |
|
st.write(f"Total Anomalies Detected: {len(anomalies)}") |
|
st.dataframe(anomalies) |
|
|
|
|
|
with tab3: |
|
st.subheader("Energy Consumption Anomaly Visualization") |
|
feature_x = st.selectbox("Select X-axis Feature", numerical_cols, index=0) |
|
feature_y = st.selectbox("Select Y-axis Feature", numerical_cols, index=1) |
|
|
|
fig = px.scatter(df, x=feature_x, y=feature_y, color="Anomaly", title="Anomaly Detection in Energy Consumption", |
|
color_discrete_map={"Normal": "blue", "Anomaly": "red"}) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|