Spaces:
Sleeping
Sleeping
add introduction
Browse files- .gitignore +2 -0
- .idea/workspace.xml +0 -64
- pages/Anchors.py +2 -1
- pages/LIME.py +2 -1
- pages/SHAP.py +15 -14
- src/prompt_config.py +40 -0
.gitignore
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
.DS_Store
|
2 |
venv/
|
|
|
|
|
|
1 |
.DS_Store
|
2 |
venv/
|
3 |
+
.idea/*
|
4 |
+
.idea/
|
.idea/workspace.xml
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<project version="4">
|
3 |
-
<component name="ChangeListManager">
|
4 |
-
<list default="true" id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="">
|
5 |
-
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
6 |
-
<change beforePath="$PROJECT_DIR$/pages/Anchors.py" beforeDir="false" afterPath="$PROJECT_DIR$/pages/Anchors.py" afterDir="false" />
|
7 |
-
</list>
|
8 |
-
<option name="SHOW_DIALOG" value="false" />
|
9 |
-
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
10 |
-
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
11 |
-
<option name="LAST_RESOLUTION" value="IGNORE" />
|
12 |
-
</component>
|
13 |
-
<component name="FileTemplateManagerImpl">
|
14 |
-
<option name="RECENT_TEMPLATES">
|
15 |
-
<list>
|
16 |
-
<option value="Python Script" />
|
17 |
-
</list>
|
18 |
-
</option>
|
19 |
-
</component>
|
20 |
-
<component name="Git.Settings">
|
21 |
-
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
22 |
-
</component>
|
23 |
-
<component name="ProjectId" id="2tXP93HJyXRzZGrYXYfz8Dq3x4q" />
|
24 |
-
<component name="ProjectLevelVcsManager">
|
25 |
-
<ConfirmationsSetting value="2" id="Add" />
|
26 |
-
</component>
|
27 |
-
<component name="ProjectViewState">
|
28 |
-
<option name="autoscrollToSource" value="true" />
|
29 |
-
<option name="hideEmptyMiddlePackages" value="true" />
|
30 |
-
<option name="showExcludedFiles" value="true" />
|
31 |
-
<option name="showLibraryContents" value="true" />
|
32 |
-
</component>
|
33 |
-
<component name="PropertiesComponent">
|
34 |
-
<property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
|
35 |
-
<property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
|
36 |
-
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
|
37 |
-
<property name="last_opened_file_path" value="$PROJECT_DIR$/pages" />
|
38 |
-
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
|
39 |
-
</component>
|
40 |
-
<component name="RecentsManager">
|
41 |
-
<key name="CopyFile.RECENT_KEYS">
|
42 |
-
<recent name="$PROJECT_DIR$/pages" />
|
43 |
-
</key>
|
44 |
-
</component>
|
45 |
-
<component name="SvnConfiguration">
|
46 |
-
<configuration />
|
47 |
-
</component>
|
48 |
-
<component name="TaskManager">
|
49 |
-
<task active="true" id="Default" summary="Default task">
|
50 |
-
<changelist id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="" />
|
51 |
-
<created>1740495290809</created>
|
52 |
-
<option name="number" value="Default" />
|
53 |
-
<option name="presentableId" value="Default" />
|
54 |
-
<updated>1740495290809</updated>
|
55 |
-
</task>
|
56 |
-
<servers />
|
57 |
-
</component>
|
58 |
-
<component name="WindowStateProjectService">
|
59 |
-
<state x="367" y="-1084" key="SettingsEditor" timestamp="1740495774369">
|
60 |
-
<screen x="-422" y="-1440" width="2560" height="1440" />
|
61 |
-
</state>
|
62 |
-
<state x="367" y="-1084" key="SettingsEditor/0.25.1440.875/[email protected]" timestamp="1740495774369" />
|
63 |
-
</component>
|
64 |
-
</project>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/Anchors.py
CHANGED
@@ -127,7 +127,8 @@ def main():
|
|
127 |
step=1, # Step size
|
128 |
help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
|
129 |
)
|
130 |
-
|
|
|
131 |
# Explain the selected sample
|
132 |
if st.button("Explain Sample"):
|
133 |
explain_example(anchors_threshold, example_idx)
|
|
|
127 |
step=1, # Step size
|
128 |
help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
|
129 |
)
|
130 |
+
st.title("Anchors")
|
131 |
+
st.write(prompt_params.ANCHORS_INTRODUCTION)
|
132 |
# Explain the selected sample
|
133 |
if st.button("Explain Sample"):
|
134 |
explain_example(anchors_threshold, example_idx)
|
pages/LIME.py
CHANGED
@@ -122,7 +122,8 @@ def main():
|
|
122 |
step=1, # Step size
|
123 |
help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
|
124 |
)
|
125 |
-
|
|
|
126 |
# Explain the selected sample
|
127 |
if st.button("Explain Sample"):
|
128 |
explain_example(lime_kernel_width, example_idx)
|
|
|
122 |
step=1, # Step size
|
123 |
help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
|
124 |
)
|
125 |
+
st.title("LIME: Local Interpretable Model-agnostic Explanations")
|
126 |
+
st.write(prompt_params.LIME_INTRODUCTION)
|
127 |
# Explain the selected sample
|
128 |
if st.button("Explain Sample"):
|
129 |
explain_example(lime_kernel_width, example_idx)
|
pages/SHAP.py
CHANGED
@@ -31,9 +31,7 @@ def train_model():
|
|
31 |
|
32 |
print("XGBoost Model training completed!")
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
def explain_example(kernel_width, example_idx):
|
37 |
""" Explain a given sample without retraining the model. """
|
38 |
global global_model, X_train, X_test, y_train, y_test
|
39 |
|
@@ -41,19 +39,21 @@ def explain_example(kernel_width, example_idx):
|
|
41 |
train_model()
|
42 |
|
43 |
X, y = shap.datasets.adult()
|
44 |
-
|
45 |
-
explainer = shap.TreeExplainer(global_model,
|
46 |
shap_values = explainer.shap_values(X_test) # Get shap values
|
47 |
shap_values_exp = explainer(X_test) # Get explainer for X_test
|
48 |
|
49 |
# SHAP Summary Plot (BeeSwarm)
|
50 |
st.write("### 📊 SHAP Summary Plot")
|
|
|
51 |
fig, ax = plt.subplots(figsize=(10, 5))
|
52 |
shap.summary_plot(shap_values, X_test, show=False)
|
53 |
st.pyplot(fig)
|
54 |
|
55 |
# SHAP Summary Bar Plot
|
56 |
st.write("### 📊 SHAP Feature Importance (Bar Plot)")
|
|
|
57 |
fig, ax = plt.subplots(figsize=(10, 5))
|
58 |
shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
|
59 |
st.pyplot(fig)
|
@@ -66,6 +66,7 @@ def explain_example(kernel_width, example_idx):
|
|
66 |
|
67 |
# SHAP Waterfall Plot
|
68 |
st.write(f"### 🌊 SHAP Waterfall Plot for Example {example_idx}")
|
|
|
69 |
fig, ax = plt.subplots(figsize=(10, 5))
|
70 |
shap.plots.waterfall(shap_values_exp[example_idx], show=False)
|
71 |
st.pyplot(fig)
|
@@ -79,13 +80,12 @@ def main():
|
|
79 |
train_model()
|
80 |
|
81 |
# Streamlit UI Controls
|
82 |
-
|
83 |
-
label="
|
84 |
-
min_value=
|
85 |
-
max_value=
|
86 |
-
value=
|
87 |
-
step=
|
88 |
-
help=prompt_params.LIME_KERNEL_WIDTH_HELP,
|
89 |
)
|
90 |
|
91 |
example_idx = st.sidebar.number_input(
|
@@ -96,10 +96,11 @@ def main():
|
|
96 |
step=1, # Step size
|
97 |
help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
|
98 |
)
|
99 |
-
|
|
|
100 |
# Explain the selected sample
|
101 |
if st.button("Explain Sample"):
|
102 |
-
explain_example(
|
103 |
|
104 |
|
105 |
if __name__ == '__main__':
|
|
|
31 |
|
32 |
print("XGBoost Model training completed!")
|
33 |
|
34 |
+
def explain_example(baseline_number, example_idx):
|
|
|
|
|
35 |
""" Explain a given sample without retraining the model. """
|
36 |
global global_model, X_train, X_test, y_train, y_test
|
37 |
|
|
|
39 |
train_model()
|
40 |
|
41 |
X, y = shap.datasets.adult()
|
42 |
+
X_base = shap.utils.sample(X, baseline_number)
|
43 |
+
explainer = shap.TreeExplainer(global_model, X_base) # Use the TreeExplainer algorithm with background distribution
|
44 |
shap_values = explainer.shap_values(X_test) # Get shap values
|
45 |
shap_values_exp = explainer(X_test) # Get explainer for X_test
|
46 |
|
47 |
# SHAP Summary Plot (BeeSwarm)
|
48 |
st.write("### 📊 SHAP Summary Plot")
|
49 |
+
st.write("This plot provides an intuitive way to see how different features contribute to individual predictions, making model interpretations easier!")
|
50 |
fig, ax = plt.subplots(figsize=(10, 5))
|
51 |
shap.summary_plot(shap_values, X_test, show=False)
|
52 |
st.pyplot(fig)
|
53 |
|
54 |
# SHAP Summary Bar Plot
|
55 |
st.write("### 📊 SHAP Feature Importance (Bar Plot)")
|
56 |
+
st.write("It helps understand which features the model relies on most.")
|
57 |
fig, ax = plt.subplots(figsize=(10, 5))
|
58 |
shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
|
59 |
st.pyplot(fig)
|
|
|
66 |
|
67 |
# SHAP Waterfall Plot
|
68 |
st.write(f"### 🌊 SHAP Waterfall Plot for Example {example_idx}")
|
69 |
+
st.write(f"Visualize the SHAP values for an instance of interest")
|
70 |
fig, ax = plt.subplots(figsize=(10, 5))
|
71 |
shap.plots.waterfall(shap_values_exp[example_idx], show=False)
|
72 |
st.pyplot(fig)
|
|
|
80 |
train_model()
|
81 |
|
82 |
# Streamlit UI Controls
|
83 |
+
baseline_number = st.sidebar.number_input(
|
84 |
+
label="Select the number of baseline:",
|
85 |
+
min_value=20,
|
86 |
+
max_value=1000,
|
87 |
+
value=100, # Default value
|
88 |
+
step=1
|
|
|
89 |
)
|
90 |
|
91 |
example_idx = st.sidebar.number_input(
|
|
|
96 |
step=1, # Step size
|
97 |
help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
|
98 |
)
|
99 |
+
st.title("SHAP")
|
100 |
+
st.write(prompt_params.SHAP_INTRODUCTION)
|
101 |
# Explain the selected sample
|
102 |
if st.button("Explain Sample"):
|
103 |
+
explain_example(baseline_number, example_idx)
|
104 |
|
105 |
|
106 |
if __name__ == '__main__':
|
src/prompt_config.py
CHANGED
@@ -3,7 +3,20 @@ This application provides explainability for machine learning models using LIME
|
|
3 |
It allows users to explore how different features influence model predictions by selecting
|
4 |
specific samples and visualizing their explanations interactively.
|
5 |
"""
|
|
|
|
|
|
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
LIME_KERNEL_WIDTH_HELP = """
|
9 |
The `kernel_width` parameter in LIME controls the size of the neighborhood used to generate perturbations around a sample
|
@@ -27,6 +40,22 @@ Select the index of the example you want to explain.
|
|
27 |
e.g., Example 100 is higher than 50K
|
28 |
"""
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
ANCHORS_THRESHOLD_HELP = """
|
31 |
The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
|
32 |
|
@@ -38,3 +67,14 @@ The `threshold` parameter controls the precision (confidence) level of the Ancho
|
|
38 |
|
39 |
Choosing an appropriate threshold balances **rule reliability** and **availability**.
|
40 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
It allows users to explore how different features influence model predictions by selecting
|
4 |
specific samples and visualizing their explanations interactively.
|
5 |
"""
|
6 |
+
LIME_INTRODUCTION = """
|
7 |
+
LIME (Local Interpretable Model-agnostic Explanations) is a technique used to interpret the predictions of black-box machine learning models.
|
8 |
+
It provides local interpretability by approximating the decision boundary of the model in the vicinity of a specific instance with a simpler, interpretable model.
|
9 |
|
10 |
+
The working principle of LIME includes the following steps:
|
11 |
+
1. Select the instance to explain.
|
12 |
+
2. Generate perturbed samples by randomly altering feature values to create similar data points.
|
13 |
+
3. Obtain model predictions for these perturbed samples using the original black-box model.
|
14 |
+
4. Compute similarity weights by assigning higher weights to samples more similar to the original instance.
|
15 |
+
5. Train a local interpretable model (usually a weighted linear regression model).
|
16 |
+
6. Interpret the results by analyzing the coefficients of the local model to understand feature contributions to the prediction.
|
17 |
+
|
18 |
+
By using LIME, complex models become more transparent, enhancing their trustworthiness and interpretability.
|
19 |
+
"""
|
20 |
|
21 |
LIME_KERNEL_WIDTH_HELP = """
|
22 |
The `kernel_width` parameter in LIME controls the size of the neighborhood used to generate perturbations around a sample
|
|
|
40 |
e.g., Example 100 is higher than 50K
|
41 |
"""
|
42 |
|
43 |
+
ANCHORS_INTRODUCTION = """
|
44 |
+
Anchors provide model-agnostic interpretations similar to LIME but aim to generate highly **precise and human-interpretable rules** that sufficiently explain the model’s prediction for a given instance.
|
45 |
+
|
46 |
+
The process of Anchors includes the following steps:
|
47 |
+
1. Select the instance to explain.
|
48 |
+
2. Generate perturbed samples but modify only **non-anchor** features while keeping others fixed.
|
49 |
+
3. Get model predictions for these perturbed samples.
|
50 |
+
4. Find **stable anchor conditions** that ensure the perturbed samples consistently receive the same prediction as the original instance.
|
51 |
+
5. Trade-off between precision and coverage:
|
52 |
+
- **Precision**: The fraction of perturbed samples that match the original prediction under the anchor rule.
|
53 |
+
- **Coverage**: The fraction of all possible perturbations that the anchor rule applies to.
|
54 |
+
6. Final explanation is presented as an **if-then rule**, making it highly interpretable.
|
55 |
+
|
56 |
+
By using Anchors, interpretability improves by generating highly stable and human-readable explanations, making them particularly useful for high-stakes applications.
|
57 |
+
"""
|
58 |
+
|
59 |
ANCHORS_THRESHOLD_HELP = """
|
60 |
The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
|
61 |
|
|
|
67 |
|
68 |
Choosing an appropriate threshold balances **rule reliability** and **availability**.
|
69 |
"""
|
70 |
+
SHAP_INTRODUCTION = """
|
71 |
+
SHAP (SHapley Additive exPlanations) provides model-agnostic interpretations to fairly distribute the contribution of each feature to a model's prediction.
|
72 |
+
|
73 |
+
The process of SHAP includes the following steps:
|
74 |
+
1. **Train a Model**: Fit a machine learning model (e.g., XGBoost) on a dataset.
|
75 |
+
2. **Select Baseline Data**: Choose a reference point (average prediction) to compute feature contributions.
|
76 |
+
3. **Compute SHAP Values**: Quantify the contribution of each feature using a weighted average over all possible feature subsets.
|
77 |
+
4. **Ensure Additivity**: The sum of SHAP values should match the model's prediction difference from the baseline.
|
78 |
+
|
79 |
+
By using SHAP, **interpretability** improves by generating stable and mathematically sound explanations, making models more transparent and trustworthy.
|
80 |
+
"""
|