peggy30 commited on
Commit
5963f5d
·
1 Parent(s): f6d6f0e

add introduction

Browse files
Files changed (6) hide show
  1. .gitignore +2 -0
  2. .idea/workspace.xml +0 -64
  3. pages/Anchors.py +2 -1
  4. pages/LIME.py +2 -1
  5. pages/SHAP.py +15 -14
  6. src/prompt_config.py +40 -0
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  .DS_Store
2
  venv/
 
 
 
1
  .DS_Store
2
  venv/
3
+ .idea/*
4
+ .idea/
.idea/workspace.xml DELETED
@@ -1,64 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ChangeListManager">
4
- <list default="true" id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="">
5
- <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
6
- <change beforePath="$PROJECT_DIR$/pages/Anchors.py" beforeDir="false" afterPath="$PROJECT_DIR$/pages/Anchors.py" afterDir="false" />
7
- </list>
8
- <option name="SHOW_DIALOG" value="false" />
9
- <option name="HIGHLIGHT_CONFLICTS" value="true" />
10
- <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
11
- <option name="LAST_RESOLUTION" value="IGNORE" />
12
- </component>
13
- <component name="FileTemplateManagerImpl">
14
- <option name="RECENT_TEMPLATES">
15
- <list>
16
- <option value="Python Script" />
17
- </list>
18
- </option>
19
- </component>
20
- <component name="Git.Settings">
21
- <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
22
- </component>
23
- <component name="ProjectId" id="2tXP93HJyXRzZGrYXYfz8Dq3x4q" />
24
- <component name="ProjectLevelVcsManager">
25
- <ConfirmationsSetting value="2" id="Add" />
26
- </component>
27
- <component name="ProjectViewState">
28
- <option name="autoscrollToSource" value="true" />
29
- <option name="hideEmptyMiddlePackages" value="true" />
30
- <option name="showExcludedFiles" value="true" />
31
- <option name="showLibraryContents" value="true" />
32
- </component>
33
- <component name="PropertiesComponent">
34
- <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
35
- <property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
36
- <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
37
- <property name="last_opened_file_path" value="$PROJECT_DIR$/pages" />
38
- <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
39
- </component>
40
- <component name="RecentsManager">
41
- <key name="CopyFile.RECENT_KEYS">
42
- <recent name="$PROJECT_DIR$/pages" />
43
- </key>
44
- </component>
45
- <component name="SvnConfiguration">
46
- <configuration />
47
- </component>
48
- <component name="TaskManager">
49
- <task active="true" id="Default" summary="Default task">
50
- <changelist id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="" />
51
- <created>1740495290809</created>
52
- <option name="number" value="Default" />
53
- <option name="presentableId" value="Default" />
54
- <updated>1740495290809</updated>
55
- </task>
56
- <servers />
57
- </component>
58
- <component name="WindowStateProjectService">
59
- <state x="367" y="-1084" key="SettingsEditor" timestamp="1740495774369">
60
- <screen x="-422" y="-1440" width="2560" height="1440" />
61
- </state>
62
- <state x="367" y="-1084" key="SettingsEditor/0.25.1440.875/[email protected]" timestamp="1740495774369" />
63
- </component>
64
- </project>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/Anchors.py CHANGED
@@ -127,7 +127,8 @@ def main():
127
  step=1, # Step size
128
  help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
129
  )
130
-
 
131
  # Explain the selected sample
132
  if st.button("Explain Sample"):
133
  explain_example(anchors_threshold, example_idx)
 
127
  step=1, # Step size
128
  help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
129
  )
130
+ st.title("Anchors")
131
+ st.write(prompt_params.ANCHORS_INTRODUCTION)
132
  # Explain the selected sample
133
  if st.button("Explain Sample"):
134
  explain_example(anchors_threshold, example_idx)
pages/LIME.py CHANGED
@@ -122,7 +122,8 @@ def main():
122
  step=1, # Step size
123
  help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
124
  )
125
-
 
126
  # Explain the selected sample
127
  if st.button("Explain Sample"):
128
  explain_example(lime_kernel_width, example_idx)
 
122
  step=1, # Step size
123
  help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
124
  )
125
+ st.title("LIME: Local Interpretable Model-agnostic Explanations")
126
+ st.write(prompt_params.LIME_INTRODUCTION)
127
  # Explain the selected sample
128
  if st.button("Explain Sample"):
129
  explain_example(lime_kernel_width, example_idx)
pages/SHAP.py CHANGED
@@ -31,9 +31,7 @@ def train_model():
31
 
32
  print("XGBoost Model training completed!")
33
 
34
-
35
-
36
- def explain_example(kernel_width, example_idx):
37
  """ Explain a given sample without retraining the model. """
38
  global global_model, X_train, X_test, y_train, y_test
39
 
@@ -41,19 +39,21 @@ def explain_example(kernel_width, example_idx):
41
  train_model()
42
 
43
  X, y = shap.datasets.adult()
44
- X100 = shap.utils.sample(X, 100)
45
- explainer = shap.TreeExplainer(global_model, X100) # Use the TreeExplainer algorithm with background distribution
46
  shap_values = explainer.shap_values(X_test) # Get shap values
47
  shap_values_exp = explainer(X_test) # Get explainer for X_test
48
 
49
  # SHAP Summary Plot (BeeSwarm)
50
  st.write("### 📊 SHAP Summary Plot")
 
51
  fig, ax = plt.subplots(figsize=(10, 5))
52
  shap.summary_plot(shap_values, X_test, show=False)
53
  st.pyplot(fig)
54
 
55
  # SHAP Summary Bar Plot
56
  st.write("### 📊 SHAP Feature Importance (Bar Plot)")
 
57
  fig, ax = plt.subplots(figsize=(10, 5))
58
  shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
59
  st.pyplot(fig)
@@ -66,6 +66,7 @@ def explain_example(kernel_width, example_idx):
66
 
67
  # SHAP Waterfall Plot
68
  st.write(f"### 🌊 SHAP Waterfall Plot for Example {example_idx}")
 
69
  fig, ax = plt.subplots(figsize=(10, 5))
70
  shap.plots.waterfall(shap_values_exp[example_idx], show=False)
71
  st.pyplot(fig)
@@ -79,13 +80,12 @@ def main():
79
  train_model()
80
 
81
  # Streamlit UI Controls
82
- lime_kernel_width = st.sidebar.slider(
83
- label="Set the `kernel` value:",
84
- min_value=0.0,
85
- max_value=100.0,
86
- value=3.0, # Default value
87
- step=0.1, # Step size
88
- help=prompt_params.LIME_KERNEL_WIDTH_HELP,
89
  )
90
 
91
  example_idx = st.sidebar.number_input(
@@ -96,10 +96,11 @@ def main():
96
  step=1, # Step size
97
  help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
98
  )
99
-
 
100
  # Explain the selected sample
101
  if st.button("Explain Sample"):
102
- explain_example(lime_kernel_width, example_idx)
103
 
104
 
105
  if __name__ == '__main__':
 
31
 
32
  print("XGBoost Model training completed!")
33
 
34
+ def explain_example(baseline_number, example_idx):
 
 
35
  """ Explain a given sample without retraining the model. """
36
  global global_model, X_train, X_test, y_train, y_test
37
 
 
39
  train_model()
40
 
41
  X, y = shap.datasets.adult()
42
+ X_base = shap.utils.sample(X, baseline_number)
43
+ explainer = shap.TreeExplainer(global_model, X_base) # Use the TreeExplainer algorithm with background distribution
44
  shap_values = explainer.shap_values(X_test) # Get shap values
45
  shap_values_exp = explainer(X_test) # Get explainer for X_test
46
 
47
  # SHAP Summary Plot (BeeSwarm)
48
  st.write("### 📊 SHAP Summary Plot")
49
+ st.write("This plot provides an intuitive way to see how different features contribute to individual predictions, making model interpretations easier!")
50
  fig, ax = plt.subplots(figsize=(10, 5))
51
  shap.summary_plot(shap_values, X_test, show=False)
52
  st.pyplot(fig)
53
 
54
  # SHAP Summary Bar Plot
55
  st.write("### 📊 SHAP Feature Importance (Bar Plot)")
56
+ st.write("It helps understand which features the model relies on most.")
57
  fig, ax = plt.subplots(figsize=(10, 5))
58
  shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
59
  st.pyplot(fig)
 
66
 
67
  # SHAP Waterfall Plot
68
  st.write(f"### 🌊 SHAP Waterfall Plot for Example {example_idx}")
69
+ st.write(f"Visualize the SHAP values for an instance of interest")
70
  fig, ax = plt.subplots(figsize=(10, 5))
71
  shap.plots.waterfall(shap_values_exp[example_idx], show=False)
72
  st.pyplot(fig)
 
80
  train_model()
81
 
82
  # Streamlit UI Controls
83
+ baseline_number = st.sidebar.number_input(
84
+ label="Select the number of baseline:",
85
+ min_value=20,
86
+ max_value=1000,
87
+ value=100, # Default value
88
+ step=1
 
89
  )
90
 
91
  example_idx = st.sidebar.number_input(
 
96
  step=1, # Step size
97
  help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
98
  )
99
+ st.title("SHAP")
100
+ st.write(prompt_params.SHAP_INTRODUCTION)
101
  # Explain the selected sample
102
  if st.button("Explain Sample"):
103
+ explain_example(baseline_number, example_idx)
104
 
105
 
106
  if __name__ == '__main__':
src/prompt_config.py CHANGED
@@ -3,7 +3,20 @@ This application provides explainability for machine learning models using LIME
3
  It allows users to explore how different features influence model predictions by selecting
4
  specific samples and visualizing their explanations interactively.
5
  """
 
 
 
6
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  LIME_KERNEL_WIDTH_HELP = """
9
  The `kernel_width` parameter in LIME controls the size of the neighborhood used to generate perturbations around a sample
@@ -27,6 +40,22 @@ Select the index of the example you want to explain.
27
  e.g., Example 100 is higher than 50K
28
  """
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ANCHORS_THRESHOLD_HELP = """
31
  The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
32
 
@@ -38,3 +67,14 @@ The `threshold` parameter controls the precision (confidence) level of the Ancho
38
 
39
  Choosing an appropriate threshold balances **rule reliability** and **availability**.
40
  """
 
 
 
 
 
 
 
 
 
 
 
 
3
  It allows users to explore how different features influence model predictions by selecting
4
  specific samples and visualizing their explanations interactively.
5
  """
6
+ LIME_INTRODUCTION = """
7
+ LIME (Local Interpretable Model-agnostic Explanations) is a technique used to interpret the predictions of black-box machine learning models.
8
+ It provides local interpretability by approximating the decision boundary of the model in the vicinity of a specific instance with a simpler, interpretable model.
9
 
10
+ The working principle of LIME includes the following steps:
11
+ 1. Select the instance to explain.
12
+ 2. Generate perturbed samples by randomly altering feature values to create similar data points.
13
+ 3. Obtain model predictions for these perturbed samples using the original black-box model.
14
+ 4. Compute similarity weights by assigning higher weights to samples more similar to the original instance.
15
+ 5. Train a local interpretable model (usually a weighted linear regression model).
16
+ 6. Interpret the results by analyzing the coefficients of the local model to understand feature contributions to the prediction.
17
+
18
+ By using LIME, complex models become more transparent, enhancing their trustworthiness and interpretability.
19
+ """
20
 
21
  LIME_KERNEL_WIDTH_HELP = """
22
  The `kernel_width` parameter in LIME controls the size of the neighborhood used to generate perturbations around a sample
 
40
  e.g., Example 100 is higher than 50K
41
  """
42
 
43
+ ANCHORS_INTRODUCTION = """
44
+ Anchors provide model-agnostic interpretations similar to LIME but aim to generate highly **precise and human-interpretable rules** that sufficiently explain the model’s prediction for a given instance.
45
+
46
+ The process of Anchors includes the following steps:
47
+ 1. Select the instance to explain.
48
+ 2. Generate perturbed samples but modify only **non-anchor** features while keeping others fixed.
49
+ 3. Get model predictions for these perturbed samples.
50
+ 4. Find **stable anchor conditions** that ensure the perturbed samples consistently receive the same prediction as the original instance.
51
+ 5. Trade-off between precision and coverage:
52
+ - **Precision**: The fraction of perturbed samples that match the original prediction under the anchor rule.
53
+ - **Coverage**: The fraction of all possible perturbations that the anchor rule applies to.
54
+ 6. Final explanation is presented as an **if-then rule**, making it highly interpretable.
55
+
56
+ By using Anchors, interpretability improves by generating highly stable and human-readable explanations, making them particularly useful for high-stakes applications.
57
+ """
58
+
59
  ANCHORS_THRESHOLD_HELP = """
60
  The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
61
 
 
67
 
68
  Choosing an appropriate threshold balances **rule reliability** and **availability**.
69
  """
70
+ SHAP_INTRODUCTION = """
71
+ SHAP (SHapley Additive exPlanations) provides model-agnostic interpretations to fairly distribute the contribution of each feature to a model's prediction.
72
+
73
+ The process of SHAP includes the following steps:
74
+ 1. **Train a Model**: Fit a machine learning model (e.g., XGBoost) on a dataset.
75
+ 2. **Select Baseline Data**: Choose a reference point (average prediction) to compute feature contributions.
76
+ 3. **Compute SHAP Values**: Quantify the contribution of each feature using a weighted average over all possible feature subsets.
77
+ 4. **Ensure Additivity**: The sum of SHAP values should match the model's prediction difference from the baseline.
78
+
79
+ By using SHAP, **interpretability** improves by generating stable and mathematically sound explanations, making models more transparent and trustworthy.
80
+ """