Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| from sklearn.cluster import KMeans | |
| from sklearn.metrics import silhouette_score | |
| # Load dataset | |
| def load_data(): | |
| df = pd.read_csv("datasets/Mall_Customers.csv") | |
| return df | |
| df = load_data() | |
| df.drop(columns=["CustomerID"], inplace=True) # Drop non-essential column | |
| le = LabelEncoder() | |
| df["Genre"] = le.fit_transform(df["Genre"]) # Encode Gender (Male=0, Female=1) | |
| scaler = StandardScaler() | |
| df_scaled = scaler.fit_transform(df) | |
| # Find optimal K | |
| wcss = [] | |
| k_values = range(1, 11) | |
| for k in k_values: | |
| kmeans = KMeans(n_clusters=k, init='k-means++', random_state=1, n_init=10) | |
| kmeans.fit(df_scaled) | |
| wcss.append(kmeans.inertia_) | |
| # Choose optimal K (assumed 5 based on elbow curve) | |
| k_optimal = 5 | |
| kmeans = KMeans(n_clusters=k_optimal, init='k-means++', random_state=1, n_init=10) | |
| kmeans.fit(df_scaled) | |
| df['Cluster'] = kmeans.labels_ | |
| sil_score = silhouette_score(df_scaled, kmeans.labels_) | |
| # Streamlit App | |
| st.title("Clustering: Mall Customers Segmentation") | |
| st.caption("Dataset: Mall_Customers.csv") | |
| tab1, tab2, tab3 = st.tabs(["Model Performance", "Dataset", "Customer Segment Predictor"]) | |
| with tab1: | |
| st.header("Model Performance") | |
| st.write(f"**Silhouette Score:** {sil_score:.4f}") | |
| fig, ax = plt.subplots() | |
| plt.plot(k_values, wcss, marker='o', linestyle='--') | |
| plt.xlabel('Number of Clusters (K)') | |
| plt.ylabel('WCSS (Within-Cluster Sum of Squares)') | |
| plt.title('Elbow Method for Optimal K') | |
| st.pyplot(fig) | |
| st.subheader("Customer Segments Visualization") | |
| fig, ax = plt.subplots() | |
| sns.scatterplot(x=df['Annual Income (k$)'], y=df['Spending Score (1-100)'], hue=df['Cluster'], palette='viridis') | |
| plt.xlabel('Annual Income (k$)') | |
| plt.ylabel('Spending Score') | |
| plt.title('Customer Segments') | |
| st.pyplot(fig) | |
| st.divider() | |
| with tab2: | |
| st.header("Dataset") | |
| def corr_matrix(data, title): | |
| data = data.select_dtypes(include=["number"]) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax) | |
| ax.set_title(title) | |
| st.pyplot(fig) | |
| corr_matrix(df, "Correlation Matrix") | |
| view_type = st.radio("Order:", ["Top -> Bottom", "Bottom -> Top"]) | |
| if view_type == "Top -> Bottom": | |
| st.dataframe(df.head(len(df))) | |
| else: | |
| st.dataframe(df.tail(len(df)).iloc[::-1]) | |
| st.divider() | |
| with tab3: | |
| st.header("Customer Segment Predictor") | |
| income = st.slider("Annual Income (k$)", int(df['Annual Income (k$)'].min()), int(df['Annual Income (k$)'].max()), int(df['Annual Income (k$)'].median())) | |
| spending = st.slider("Spending Score (1-100)", int(df['Spending Score (1-100)'].min()), int(df['Spending Score (1-100)'].max()), int(df['Spending Score (1-100)'].median())) | |
| age = st.slider("Age", int(df['Age'].min()), int(df['Age'].max()), int(df['Age'].median())) | |
| gender = st.radio("Gender", ["Male", "Female"]) | |
| input_data = pd.DataFrame([[gender, age, income, spending]], columns=["Genre", "Age", "Annual Income (k$)", "Spending Score (1-100)"]) | |
| input_data["Genre"] = le.transform([gender])[0] # Encode gender | |
| input_scaled = scaler.transform(input_data) | |
| predicted_cluster = kmeans.predict(input_scaled)[0] | |
| st.subheader("Predicted Customer Segment") | |
| st.markdown(f"<h1 style='color:green;'>Cluster {predicted_cluster}</h1>", unsafe_allow_html=True) | |
| # Graph to visualize input placement | |
| fig, ax = plt.subplots() | |
| sns.scatterplot(x=df['Annual Income (k$)'], y=df['Spending Score (1-100)'], hue=df['Cluster'], palette='viridis', alpha=0.6) | |
| plt.scatter(income, spending, color='red', label='Your Input', edgecolors='black', s=100) | |
| plt.xlabel('Annual Income (k$)') | |
| plt.ylabel('Spending Score') | |
| plt.title('Customer Segments with Your Input') | |
| plt.legend() | |
| st.pyplot(fig) | |
| st.divider() |