Spaces:

aikanava
/

customer_segmentation

Sleeping

App Files Files Community

customer_segmentation / app.py

aikanava

uploaded files

0e9897a 4 months ago

raw

history blame contribute delete

3.33 kB

	# app.py

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.cluster import KMeans
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Streamlit page settings
	st.set_page_config(page_title="K-Means Clustering App", page_icon="🤖", layout="wide")

	# Title
	st.title("🤖 K-Means Clustering Explorer")
	st.write("This app performs K-Means Clustering on a customer segmentation dataset.")

	# Load dataset (local file)
	@st.cache_data
	def load_data():
	data = pd.read_csv("Mall_Customers.csv") # Make sure this file is in the same folder
	return data

	data = load_data()

	# Select features
	features = data[['Annual Income (k$)', 'Spending Score (1-100)']]

	# Sidebar
	st.sidebar.header("Settings")
	k = st.sidebar.slider("Select number of clusters (K)", 1, 10, 3)

	# Perform KMeans clustering
	kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
	clusters = kmeans.fit_predict(features)
	data['Cluster'] = clusters

	# Calculate Elbow Method data
	wcss = []
	for i in range(1, 11):
	km = KMeans(n_clusters=i, init='k-means++', random_state=42)
	km.fit(features)
	wcss.append(km.inertia_)

	# Analyze clusters
	cluster_summary = data.groupby('Cluster')[['Annual Income (k$)', 'Spending Score (1-100)']].mean()

	def interpret_cluster(income, spending):
	if income >= 70 and spending >= 50:
	return "💎 Premium Customers (High Income, High Spending)"
	elif income <= 40 and spending >= 60:
	return "🔔 Potential Risk Customers (Low Income, High Spending)"
	elif income >= 70 and spending <= 40:
	return "💼 Careful Spenders (High Income, Low Spending)"
	elif income <= 40 and spending <= 40:
	return "🛒 Budget Customers (Low Income, Low Spending)"
	else:
	return "🧩 Standard Customers"

	# Create Tabs
	tab1, tab2, tab3, tab4 = st.tabs(["📄 Raw Dataset", "📈 Elbow Method", "🎯 Clustered Customers", "📝 Cluster Explanations"])

	with tab1:
	st.subheader("🧹 Raw Dataset")
	st.dataframe(data.head())

	with tab2:
	st.subheader("📈 Elbow Method (to find optimal K)")
	fig, ax = plt.subplots()
	ax.plot(range(1, 11), wcss, marker='o')
	ax.set_xlabel('Number of Clusters (K)')
	ax.set_ylabel('WCSS (Within Cluster Sum of Squares)')
	ax.set_title('The Elbow Method')
	st.pyplot(fig)

	with tab3:
	st.subheader("🎯 Clustered Customers")
	fig2, ax2 = plt.subplots()
	palette = sns.color_palette("bright", k)
	sns.scatterplot(
	x='Annual Income (k$)',
	y='Spending Score (1-100)',
	hue='Cluster',
	palette=palette,
	data=data,
	ax=ax2,
	s=100
	)
	ax2.scatter(
	kmeans.cluster_centers_[:, 0],
	kmeans.cluster_centers_[:, 1],
	s=300,
	c='black',
	marker='X',
	label='Centroids'
	)
	ax2.legend()
	ax2.set_title('Customer Segments')
	st.pyplot(fig2)

	with tab4:
	st.subheader("📝 Cluster Explanations")
	for cluster_num, row in cluster_summary.iterrows():
	explanation = interpret_cluster(row['Annual Income (k$)'], row['Spending Score (1-100)'])
	st.markdown(f"Cluster {cluster_num}: {explanation}")
	st.dataframe(cluster_summary.style.highlight_max(axis=0))

	# Footer
	st.markdown("---")
	st.caption("Made with ❤️ using Streamlit")