chiichann commited on
Commit
f4f0c0c
·
verified ·
1 Parent(s): 5e451cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -44
app.py CHANGED
@@ -9,34 +9,39 @@ import seaborn as sns
9
  # App title
10
  st.title("🛍️ Customer Segmentation Tool")
11
 
12
- # About the App section with a toggle button
13
- st.sidebar.header("About the App")
14
- with st.sidebar.expander("Learn more about the app"):
 
 
15
  st.write("""
16
  This app uses unsupervised learning techniques to segment customers based on their purchasing behavior.
17
  The dataset is preloaded from an Excel file containing online retail data.
18
 
19
  ### How It Works:
20
- 1. **Load customer transaction data**:
21
- - The app loads customer transaction data, including key details like `Quantity`, `UnitPrice`, and `CustomerID`.
22
-
23
- 2. **Process the data**:
24
- - The data is processed by calculating the total amount spent (`TotalSpent`) for each customer. This is done by multiplying `Quantity` and `UnitPrice`.
25
- - The information is then aggregated by `CustomerID` to summarize the total amount spent, the number of unique transactions, and the total quantity purchased by each customer.
26
-
27
- 3. **Apply K-Means Clustering**:
28
- - The app applies K-Means clustering to segment the customers into distinct groups based on their purchasing behavior, using the processed data.
29
-
30
- 4. **Visualize the customer segments**:
31
- - A scatter plot is created to visualize the customer segments, where customers are grouped based on the `TotalSpent` and the number of transactions (`NumTransactions`), with each cluster represented by different colors.
32
  """)
33
 
34
  # Load dataset
35
- file_path = "Online Retail.xlsx" # Updated file path
36
- df = pd.read_excel(file_path, sheet_name="Online Retail")
 
 
 
 
 
 
 
 
 
37
 
38
- st.write("### Dataset Overview")
39
- st.write(df.head())
 
 
40
 
41
  # Preprocess data
42
  df = df.dropna(subset=["CustomerID"]) # Remove rows without CustomerID
@@ -53,32 +58,34 @@ customer_data = df.groupby("CustomerID").agg({
53
  scaler = StandardScaler()
54
  customer_scaled = pd.DataFrame(scaler.fit_transform(customer_data), columns=customer_data.columns, index=customer_data.index)
55
 
56
- # User selects the number of clusters
57
- num_clusters = st.slider("Select Number of Clusters", min_value=2, max_value=10, value=3)
 
 
58
 
59
- # Apply K-Means clustering
60
- model = KMeans(n_clusters=num_clusters, random_state=42)
61
- customer_data["Cluster"] = model.fit_predict(customer_scaled)
62
 
63
- # Visualize the clusters
64
- st.write("### Clusters Visualization")
65
- fig, ax = plt.subplots()
66
- scatter = ax.scatter(customer_data["TotalSpent"], customer_data["NumTransactions"], c=customer_data["Cluster"], cmap='viridis')
67
- ax.set_xlabel("Total Spent")
68
- ax.set_ylabel("Number of Transactions")
69
- ax.set_title("Customer Segments")
70
- plt.colorbar(scatter, label="Cluster")
71
- st.pyplot(fig)
72
 
73
- # Show the segmented customer data
74
- st.write("### Customer Segments Data")
75
- st.write(customer_data.head())
76
 
77
- # Option to download the segmented data
78
- csv = customer_data.to_csv(index=True)
79
- st.download_button(
80
- label="Download Segmented Customer Data",
81
- data=csv,
82
- file_name="segmented_customer_data.csv",
83
- mime="text/csv"
84
- )
 
9
  # App title
10
  st.title("🛍️ Customer Segmentation Tool")
11
 
12
+ # 🎯 Streamlit Tabs
13
+ tab1, tab2, tab3 = st.tabs(["📖 About", "📊 Dataset Overview", "🧑‍🤝‍🧑 Customer Segmentation"])
14
+
15
+ # About Tab
16
+ with tab1:
17
  st.write("""
18
  This app uses unsupervised learning techniques to segment customers based on their purchasing behavior.
19
  The dataset is preloaded from an Excel file containing online retail data.
20
 
21
  ### How It Works:
22
+ - **Step 1**: Load customer transaction data, including details like `Quantity`, `UnitPrice`, and `CustomerID`.
23
+ - **Step 2**: Process the data by calculating the total spent and aggregating the information by customer.
24
+ - **Step 3**: Apply **K-Means Clustering** to segment the customers into distinct groups.
25
+ - **Step 4**: Visualize the customer segments with a scatter plot, and optionally download the segmented data.
 
 
 
 
 
 
 
 
26
  """)
27
 
28
  # Load dataset
29
+ file_path = "Online Retail.xlsx"
30
+
31
+ # Dataset Tab
32
+ with tab2:
33
+ try:
34
+ df = pd.read_excel(file_path, sheet_name="Online Retail")
35
+ st.write("### Dataset Overview")
36
+ st.write(df.head())
37
+ except Exception as e:
38
+ st.error(f"Error loading dataset: {e}")
39
+ st.stop()
40
 
41
+ # Verify the dataset columns
42
+ if not all(col in df.columns for col in ["CustomerID", "Quantity", "UnitPrice"]):
43
+ st.error("The dataset is missing required columns: 'CustomerID', 'Quantity', 'UnitPrice'. Please check the data.")
44
+ st.stop()
45
 
46
  # Preprocess data
47
  df = df.dropna(subset=["CustomerID"]) # Remove rows without CustomerID
 
58
  scaler = StandardScaler()
59
  customer_scaled = pd.DataFrame(scaler.fit_transform(customer_data), columns=customer_data.columns, index=customer_data.index)
60
 
61
+ # Customer Segmentation Tab
62
+ with tab3:
63
+ # User selects the number of clusters
64
+ num_clusters = st.slider("Select Number of Clusters", min_value=2, max_value=10, value=3)
65
 
66
+ # Apply K-Means clustering
67
+ model = KMeans(n_clusters=num_clusters, random_state=42)
68
+ customer_data["Cluster"] = model.fit_predict(customer_scaled)
69
 
70
+ # Visualize the clusters
71
+ st.write("### Clusters Visualization")
72
+ fig, ax = plt.subplots()
73
+ scatter = ax.scatter(customer_data["TotalSpent"], customer_data["NumTransactions"], c=customer_data["Cluster"], cmap='viridis')
74
+ ax.set_xlabel("Total Spent")
75
+ ax.set_ylabel("Number of Transactions")
76
+ ax.set_title("Customer Segments")
77
+ plt.colorbar(scatter, label="Cluster")
78
+ st.pyplot(fig)
79
 
80
+ # Show the segmented customer data
81
+ st.write("### Customer Segments Data")
82
+ st.write(customer_data.head())
83
 
84
+ # Option to download the segmented data
85
+ csv = customer_data.to_csv(index=True)
86
+ st.download_button(
87
+ label="Download Segmented Customer Data",
88
+ data=csv,
89
+ file_name="segmented_customer_data.csv",
90
+ mime="text/csv"
91
+ )