Spaces:

chiichann
/

customer_segmentation_tool

Sleeping

App Files Files Community

chiichann commited on Mar 4

Commit

f4f0c0c

verified ·

1 Parent(s): 5e451cb

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -44

app.py CHANGED Viewed

@@ -9,34 +9,39 @@ import seaborn as sns
 # App title
 st.title("🛍️ Customer Segmentation Tool")
-# About the App section with a toggle button
-st.sidebar.header("About the App")
-with st.sidebar.expander("Learn more about the app"):
     st.write("""
     This app uses unsupervised learning techniques to segment customers based on their purchasing behavior.
     The dataset is preloaded from an Excel file containing online retail data.
     ### How It Works:
-    1. **Load customer transaction data**:
-       - The app loads customer transaction data, including key details like `Quantity`, `UnitPrice`, and `CustomerID`.
-    2. **Process the data**:
-       - The data is processed by calculating the total amount spent (`TotalSpent`) for each customer. This is done by multiplying `Quantity` and `UnitPrice`.
-       - The information is then aggregated by `CustomerID` to summarize the total amount spent, the number of unique transactions, and the total quantity purchased by each customer.
-    3. **Apply K-Means Clustering**:
-       - The app applies K-Means clustering to segment the customers into distinct groups based on their purchasing behavior, using the processed data.
-    4. **Visualize the customer segments**:
-       - A scatter plot is created to visualize the customer segments, where customers are grouped based on the `TotalSpent` and the number of transactions (`NumTransactions`), with each cluster represented by different colors.
     """)
 # Load dataset
-file_path = "Online Retail.xlsx"  # Updated file path
-df = pd.read_excel(file_path, sheet_name="Online Retail")
-st.write("### Dataset Overview")
-st.write(df.head())
 # Preprocess data
 df = df.dropna(subset=["CustomerID"])  # Remove rows without CustomerID
@@ -53,32 +58,34 @@ customer_data = df.groupby("CustomerID").agg({
 scaler = StandardScaler()
 customer_scaled = pd.DataFrame(scaler.fit_transform(customer_data), columns=customer_data.columns, index=customer_data.index)
-# User selects the number of clusters
-num_clusters = st.slider("Select Number of Clusters", min_value=2, max_value=10, value=3)
-# Apply K-Means clustering
-model = KMeans(n_clusters=num_clusters, random_state=42)
-customer_data["Cluster"] = model.fit_predict(customer_scaled)
-# Visualize the clusters
-st.write("### Clusters Visualization")
-fig, ax = plt.subplots()
-scatter = ax.scatter(customer_data["TotalSpent"], customer_data["NumTransactions"], c=customer_data["Cluster"], cmap='viridis')
-ax.set_xlabel("Total Spent")
-ax.set_ylabel("Number of Transactions")
-ax.set_title("Customer Segments")
-plt.colorbar(scatter, label="Cluster")
-st.pyplot(fig)
-# Show the segmented customer data
-st.write("### Customer Segments Data")
-st.write(customer_data.head())
-# Option to download the segmented data
-csv = customer_data.to_csv(index=True)
-st.download_button(
-    label="Download Segmented Customer Data",
-    data=csv,
-    file_name="segmented_customer_data.csv",
-    mime="text/csv"
-)

 # App title
 st.title("🛍️ Customer Segmentation Tool")
+# 🎯 Streamlit Tabs
+tab1, tab2, tab3 = st.tabs(["📖 About", "📊 Dataset Overview", "🧑‍🤝‍🧑 Customer Segmentation"])
+# About Tab
+with tab1:
     st.write("""
     This app uses unsupervised learning techniques to segment customers based on their purchasing behavior.
     The dataset is preloaded from an Excel file containing online retail data.
     ### How It Works:
+    - **Step 1**: Load customer transaction data, including details like `Quantity`, `UnitPrice`, and `CustomerID`.
+    - **Step 2**: Process the data by calculating the total spent and aggregating the information by customer.
+    - **Step 3**: Apply **K-Means Clustering** to segment the customers into distinct groups.
+    - **Step 4**: Visualize the customer segments with a scatter plot, and optionally download the segmented data.
     """)
 # Load dataset
+file_path = "Online Retail.xlsx"
+# Dataset Tab
+with tab2:
+    try:
+        df = pd.read_excel(file_path, sheet_name="Online Retail")
+        st.write("### Dataset Overview")
+        st.write(df.head())
+    except Exception as e:
+        st.error(f"Error loading dataset: {e}")
+        st.stop()
+    # Verify the dataset columns
+    if not all(col in df.columns for col in ["CustomerID", "Quantity", "UnitPrice"]):
+        st.error("The dataset is missing required columns: 'CustomerID', 'Quantity', 'UnitPrice'. Please check the data.")
+        st.stop()
 # Preprocess data
 df = df.dropna(subset=["CustomerID"])  # Remove rows without CustomerID
 scaler = StandardScaler()
 customer_scaled = pd.DataFrame(scaler.fit_transform(customer_data), columns=customer_data.columns, index=customer_data.index)
+# Customer Segmentation Tab
+with tab3:
+    # User selects the number of clusters
+    num_clusters = st.slider("Select Number of Clusters", min_value=2, max_value=10, value=3)
+    # Apply K-Means clustering
+    model = KMeans(n_clusters=num_clusters, random_state=42)
+    customer_data["Cluster"] = model.fit_predict(customer_scaled)
+    # Visualize the clusters
+    st.write("### Clusters Visualization")
+    fig, ax = plt.subplots()
+    scatter = ax.scatter(customer_data["TotalSpent"], customer_data["NumTransactions"], c=customer_data["Cluster"], cmap='viridis')
+    ax.set_xlabel("Total Spent")
+    ax.set_ylabel("Number of Transactions")
+    ax.set_title("Customer Segments")
+    plt.colorbar(scatter, label="Cluster")
+    st.pyplot(fig)
+    # Show the segmented customer data
+    st.write("### Customer Segments Data")
+    st.write(customer_data.head())
+    # Option to download the segmented data
+    csv = customer_data.to_csv(index=True)
+    st.download_button(
+        label="Download Segmented Customer Data",
+        data=csv,
+        file_name="segmented_customer_data.csv",
+        mime="text/csv"
+    )