# app.py import streamlit as st import pandas as pd import numpy as np from sklearn.cluster import KMeans import matplotlib.pyplot as plt import seaborn as sns # Streamlit page settings st.set_page_config(page_title="K-Means Clustering App", page_icon="๐Ÿค–", layout="wide") # Title st.title("๐Ÿค– K-Means Clustering Explorer") st.write("This app performs **K-Means Clustering** on a customer segmentation dataset.") # Load dataset (local file) @st.cache_data def load_data(): data = pd.read_csv("Mall_Customers.csv") # Make sure this file is in the same folder return data data = load_data() # Select features features = data[['Annual Income (k$)', 'Spending Score (1-100)']] # Sidebar st.sidebar.header("Settings") k = st.sidebar.slider("Select number of clusters (K)", 1, 10, 3) # Perform KMeans clustering kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42) clusters = kmeans.fit_predict(features) data['Cluster'] = clusters # Calculate Elbow Method data wcss = [] for i in range(1, 11): km = KMeans(n_clusters=i, init='k-means++', random_state=42) km.fit(features) wcss.append(km.inertia_) # Analyze clusters cluster_summary = data.groupby('Cluster')[['Annual Income (k$)', 'Spending Score (1-100)']].mean() def interpret_cluster(income, spending): if income >= 70 and spending >= 50: return "๐Ÿ’Ž Premium Customers (High Income, High Spending)" elif income <= 40 and spending >= 60: return "๐Ÿ”” Potential Risk Customers (Low Income, High Spending)" elif income >= 70 and spending <= 40: return "๐Ÿ’ผ Careful Spenders (High Income, Low Spending)" elif income <= 40 and spending <= 40: return "๐Ÿ›’ Budget Customers (Low Income, Low Spending)" else: return "๐Ÿงฉ Standard Customers" # Create Tabs tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ“„ Raw Dataset", "๐Ÿ“ˆ Elbow Method", "๐ŸŽฏ Clustered Customers", "๐Ÿ“ Cluster Explanations"]) with tab1: st.subheader("๐Ÿงน Raw Dataset") st.dataframe(data.head()) with tab2: st.subheader("๐Ÿ“ˆ Elbow Method (to find optimal K)") fig, ax = plt.subplots() ax.plot(range(1, 11), wcss, marker='o') ax.set_xlabel('Number of Clusters (K)') ax.set_ylabel('WCSS (Within Cluster Sum of Squares)') ax.set_title('The Elbow Method') st.pyplot(fig) with tab3: st.subheader("๐ŸŽฏ Clustered Customers") fig2, ax2 = plt.subplots() palette = sns.color_palette("bright", k) sns.scatterplot( x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster', palette=palette, data=data, ax=ax2, s=100 ) ax2.scatter( kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='black', marker='X', label='Centroids' ) ax2.legend() ax2.set_title('Customer Segments') st.pyplot(fig2) with tab4: st.subheader("๐Ÿ“ Cluster Explanations") for cluster_num, row in cluster_summary.iterrows(): explanation = interpret_cluster(row['Annual Income (k$)'], row['Spending Score (1-100)']) st.markdown(f"**Cluster {cluster_num}:** {explanation}") st.dataframe(cluster_summary.style.highlight_max(axis=0)) # Footer st.markdown("---") st.caption("Made with โค๏ธ using Streamlit")