peggy30 commited on
Commit
e73bf99
·
1 Parent(s): b939871
Files changed (3) hide show
  1. pages/ALE.py +75 -0
  2. pages/ICE_and_PDP.py +1 -0
  3. src/prompt_config.py +13 -1
pages/ALE.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Libraries
2
+ import matplotlib.pyplot as plt
3
+ import streamlit as st
4
+ import src.prompt_config as prompt_params
5
+ # Models
6
+ import xgboost
7
+ from sklearn.model_selection import train_test_split
8
+ from alepython import ale_plot
9
+ # XAI (Explainability)
10
+ import shap
11
+
12
+ # Global Variables to Store Model & Data
13
+ global_model = None
14
+ X_train, X_test, y_train, y_test = None, None, None, None
15
+
16
+
17
+ def train_model():
18
+ """ Train the XGBoost model only once and store it globally. """
19
+ global global_model, X_train, X_test, y_train, y_test
20
+
21
+ if global_model is None:
22
+ # Load Data from SHAP library
23
+ X, y = shap.datasets.adult()
24
+
25
+ # Split data
26
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
27
+
28
+ # Train XGBoost model
29
+ global_model = xgboost.XGBClassifier()
30
+ global_model.fit(X_train, y_train)
31
+
32
+ print("XGBoost Model training completed!")
33
+
34
+ def explain_example():
35
+ """ Explain a given sample without retraining the model. """
36
+ global global_model, X_train, X_test, y_train, y_test
37
+
38
+ if global_model is None:
39
+ train_model()
40
+
41
+ fig, ax = plt.subplots(figsize=(10, 5))
42
+ st.write("1D Main Effect ALE Plot")
43
+ ale_plot(
44
+ global_model,
45
+ X_test,
46
+ "Age",
47
+ bins=5,
48
+ monte_carlo=True,
49
+ monte_carlo_rep=30,
50
+ monte_carlo_ratio=0.5,
51
+ )
52
+
53
+ st.pyplot(fig)
54
+
55
+ fig, ax = plt.subplots(figsize=(10, 5))
56
+ st.write("2D Second-Order ALE Plot")
57
+ ale_plot(global_model, X_test, X_train.columns[:2], bins=10)
58
+ st.pyplot(fig)
59
+
60
+ def main():
61
+ global global_model
62
+
63
+ # Ensure the model is trained only once
64
+ if global_model is None:
65
+ train_model()
66
+
67
+ st.title("ALE (Accumulated Local Effects)")
68
+ st.write(prompt_params.ALE_INTRODUCTION)
69
+ # Explain the selected sample
70
+ if st.button("Explain Sample"):
71
+ explain_example()
72
+
73
+
74
+ if __name__ == '__main__':
75
+ main()
pages/ICE_and_PDP.py CHANGED
@@ -52,6 +52,7 @@ def main():
52
  # Define feature names
53
  feature_names = ["Age", "Workclass", "Education-Num", "Marital Status", "Occupation",
54
  "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country"]
 
55
 
56
  selected_feature = st.sidebar.selectbox("Select a feature for PDP/ICE analysis:", feature_names)
57
 
 
52
  # Define feature names
53
  feature_names = ["Age", "Workclass", "Education-Num", "Marital Status", "Occupation",
54
  "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country"]
55
+ print(X_test.columns) # Check the actual feature names
56
 
57
  selected_feature = st.sidebar.selectbox("Select a feature for PDP/ICE analysis:", feature_names)
58
 
src/prompt_config.py CHANGED
@@ -105,4 +105,16 @@ When `kind` is selected:
105
  - **both**: Displays both ICE and PDP.
106
  - **individual**: Displays only ICE.
107
  - **average**: Displays only PDP.
108
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  - **both**: Displays both ICE and PDP.
106
  - **individual**: Displays only ICE.
107
  - **average**: Displays only PDP.
108
+ """
109
+
110
+ ALE_INTRODUCTION = """
111
+ ALE (Accumulated Local Effects) is an interpretable machine learning technique that quantifies the impact of a feature on model predictions while accounting for feature dependencies.
112
+
113
+ The process of ALE includes the following steps:
114
+ 1. **Bin the Feature**: Divide the feature into intervals (bins) to segment the data.
115
+ 2. **Compute Local Effects**: Measure the change in predictions when the feature moves from the lower to the upper edge of each bin.
116
+ 3. **Accumulate Effects**: Sum the local effects sequentially across bins to observe the overall influence of the feature.
117
+ 4. **Centering**: Normalize the accumulated effects by subtracting the mean to focus on relative deviations from the average prediction.
118
+
119
+ By using ALE, **interpretability** improves by capturing localized effects while mitigating bias from correlated features, making model explanations more reliable.
120
+ """