Spaces:
Sleeping
Sleeping
# app.py | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.cluster import KMeans | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Streamlit page settings | |
st.set_page_config(page_title="K-Means Clustering App", page_icon="π€", layout="wide") | |
# Title | |
st.title("π€ K-Means Clustering Explorer") | |
st.write("This app performs **K-Means Clustering** on a customer segmentation dataset.") | |
# Load dataset (local file) | |
def load_data(): | |
data = pd.read_csv("Mall_Customers.csv") # Make sure this file is in the same folder | |
return data | |
data = load_data() | |
# Select features | |
features = data[['Annual Income (k$)', 'Spending Score (1-100)']] | |
# Sidebar | |
st.sidebar.header("Settings") | |
k = st.sidebar.slider("Select number of clusters (K)", 1, 10, 3) | |
# Perform KMeans clustering | |
kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42) | |
clusters = kmeans.fit_predict(features) | |
data['Cluster'] = clusters | |
# Calculate Elbow Method data | |
wcss = [] | |
for i in range(1, 11): | |
km = KMeans(n_clusters=i, init='k-means++', random_state=42) | |
km.fit(features) | |
wcss.append(km.inertia_) | |
# Analyze clusters | |
cluster_summary = data.groupby('Cluster')[['Annual Income (k$)', 'Spending Score (1-100)']].mean() | |
def interpret_cluster(income, spending): | |
if income >= 70 and spending >= 50: | |
return "π Premium Customers (High Income, High Spending)" | |
elif income <= 40 and spending >= 60: | |
return "π Potential Risk Customers (Low Income, High Spending)" | |
elif income >= 70 and spending <= 40: | |
return "πΌ Careful Spenders (High Income, Low Spending)" | |
elif income <= 40 and spending <= 40: | |
return "π Budget Customers (Low Income, Low Spending)" | |
else: | |
return "π§© Standard Customers" | |
# Create Tabs | |
tab1, tab2, tab3, tab4 = st.tabs(["π Raw Dataset", "π Elbow Method", "π― Clustered Customers", "π Cluster Explanations"]) | |
with tab1: | |
st.subheader("π§Ή Raw Dataset") | |
st.dataframe(data.head()) | |
with tab2: | |
st.subheader("π Elbow Method (to find optimal K)") | |
fig, ax = plt.subplots() | |
ax.plot(range(1, 11), wcss, marker='o') | |
ax.set_xlabel('Number of Clusters (K)') | |
ax.set_ylabel('WCSS (Within Cluster Sum of Squares)') | |
ax.set_title('The Elbow Method') | |
st.pyplot(fig) | |
with tab3: | |
st.subheader("π― Clustered Customers") | |
fig2, ax2 = plt.subplots() | |
palette = sns.color_palette("bright", k) | |
sns.scatterplot( | |
x='Annual Income (k$)', | |
y='Spending Score (1-100)', | |
hue='Cluster', | |
palette=palette, | |
data=data, | |
ax=ax2, | |
s=100 | |
) | |
ax2.scatter( | |
kmeans.cluster_centers_[:, 0], | |
kmeans.cluster_centers_[:, 1], | |
s=300, | |
c='black', | |
marker='X', | |
label='Centroids' | |
) | |
ax2.legend() | |
ax2.set_title('Customer Segments') | |
st.pyplot(fig2) | |
with tab4: | |
st.subheader("π Cluster Explanations") | |
for cluster_num, row in cluster_summary.iterrows(): | |
explanation = interpret_cluster(row['Annual Income (k$)'], row['Spending Score (1-100)']) | |
st.markdown(f"**Cluster {cluster_num}:** {explanation}") | |
st.dataframe(cluster_summary.style.highlight_max(axis=0)) | |
# Footer | |
st.markdown("---") | |
st.caption("Made with β€οΈ using Streamlit") | |