Spaces:

peggy30
/

ExplainableAI

Sleeping

App Files Files Community

peggy30 commited on Feb 25

Commit

5963f5d

1 Parent(s): f6d6f0e

add introduction

Browse files

Files changed (6) hide show

.gitignore +2 -0
.idea/workspace.xml +0 -64
pages/Anchors.py +2 -1
pages/LIME.py +2 -1
pages/SHAP.py +15 -14
src/prompt_config.py +40 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,4 @@
 .DS_Store
 venv/

 .DS_Store
 venv/
+.idea/*
+.idea/

.idea/workspace.xml DELETED Viewed

@@ -1,64 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ChangeListManager">
-    <list default="true" id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="">
-      <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/pages/Anchors.py" beforeDir="false" afterPath="$PROJECT_DIR$/pages/Anchors.py" afterDir="false" />
-    </list>
-    <option name="SHOW_DIALOG" value="false" />
-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
-    <option name="LAST_RESOLUTION" value="IGNORE" />
-  </component>
-  <component name="FileTemplateManagerImpl">
-    <option name="RECENT_TEMPLATES">
-      <list>
-        <option value="Python Script" />
-      </list>
-    </option>
-  </component>
-  <component name="Git.Settings">
-    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
-  </component>
-  <component name="ProjectId" id="2tXP93HJyXRzZGrYXYfz8Dq3x4q" />
-  <component name="ProjectLevelVcsManager">
-    <ConfirmationsSetting value="2" id="Add" />
-  </component>
-  <component name="ProjectViewState">
-    <option name="autoscrollToSource" value="true" />
-    <option name="hideEmptyMiddlePackages" value="true" />
-    <option name="showExcludedFiles" value="true" />
-    <option name="showLibraryContents" value="true" />
-  </component>
-  <component name="PropertiesComponent">
-    <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
-    <property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
-    <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
-    <property name="last_opened_file_path" value="$PROJECT_DIR$/pages" />
-    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
-  </component>
-  <component name="RecentsManager">
-    <key name="CopyFile.RECENT_KEYS">
-      <recent name="$PROJECT_DIR$/pages" />
-    </key>
-  </component>
-  <component name="SvnConfiguration">
-    <configuration />
-  </component>
-  <component name="TaskManager">
-    <task active="true" id="Default" summary="Default task">
-      <changelist id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="" />
-      <created>1740495290809</created>
-      <option name="number" value="Default" />
-      <option name="presentableId" value="Default" />
-      <updated>1740495290809</updated>
-    </task>
-    <servers />
-  </component>
-  <component name="WindowStateProjectService">
-    <state x="367" y="-1084" key="SettingsEditor" timestamp="1740495774369">
-      <screen x="-422" y="-1440" width="2560" height="1440" />
-    </state>
-    <state x="367" y="-1084" key="SettingsEditor/0.25.1440.875/[email protected]" timestamp="1740495774369" />
-  </component>
-</project>

pages/Anchors.py CHANGED Viewed

@@ -127,7 +127,8 @@ def main():
         step=1,  # Step size
         help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
     )
     # Explain the selected sample
     if st.button("Explain Sample"):
         explain_example(anchors_threshold, example_idx)

         step=1,  # Step size
         help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
     )
+    st.title("Anchors")
+    st.write(prompt_params.ANCHORS_INTRODUCTION)
     # Explain the selected sample
     if st.button("Explain Sample"):
         explain_example(anchors_threshold, example_idx)

pages/LIME.py CHANGED Viewed

@@ -122,7 +122,8 @@ def main():
         step=1,  # Step size
         help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
     )
     # Explain the selected sample
     if st.button("Explain Sample"):
         explain_example(lime_kernel_width, example_idx)

         step=1,  # Step size
         help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
     )
+    st.title("LIME: Local Interpretable Model-agnostic Explanations")
+    st.write(prompt_params.LIME_INTRODUCTION)
     # Explain the selected sample
     if st.button("Explain Sample"):
         explain_example(lime_kernel_width, example_idx)

pages/SHAP.py CHANGED Viewed

@@ -31,9 +31,7 @@ def train_model():
         print("XGBoost Model training completed!")
-def explain_example(kernel_width, example_idx):
     """ Explain a given sample without retraining the model. """
     global global_model, X_train, X_test, y_train, y_test
@@ -41,19 +39,21 @@ def explain_example(kernel_width, example_idx):
         train_model()
     X, y = shap.datasets.adult()
-    X100 = shap.utils.sample(X, 100)
-    explainer = shap.TreeExplainer(global_model, X100)  # Use the TreeExplainer algorithm with background distribution
     shap_values = explainer.shap_values(X_test)  # Get shap values
     shap_values_exp = explainer(X_test)  # Get explainer for X_test
     # SHAP Summary Plot (BeeSwarm)
     st.write("### 📊 SHAP Summary Plot")
     fig, ax = plt.subplots(figsize=(10, 5))
     shap.summary_plot(shap_values, X_test, show=False)
     st.pyplot(fig)
     # SHAP Summary Bar Plot
     st.write("### 📊 SHAP Feature Importance (Bar Plot)")
     fig, ax = plt.subplots(figsize=(10, 5))
     shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
     st.pyplot(fig)
@@ -66,6 +66,7 @@ def explain_example(kernel_width, example_idx):
     # SHAP Waterfall Plot
     st.write(f"### 🌊 SHAP Waterfall Plot for Example {example_idx}")
     fig, ax = plt.subplots(figsize=(10, 5))
     shap.plots.waterfall(shap_values_exp[example_idx], show=False)
     st.pyplot(fig)
@@ -79,13 +80,12 @@ def main():
         train_model()
     # Streamlit UI Controls
-    lime_kernel_width = st.sidebar.slider(
-        label="Set the `kernel` value:",
-        min_value=0.0,
-        max_value=100.0,
-        value=3.0,  # Default value
-        step=0.1,  # Step size
-        help=prompt_params.LIME_KERNEL_WIDTH_HELP,
     )
     example_idx = st.sidebar.number_input(
@@ -96,10 +96,11 @@ def main():
         step=1,  # Step size
         help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
     )
     # Explain the selected sample
     if st.button("Explain Sample"):
-        explain_example(lime_kernel_width, example_idx)
 if __name__ == '__main__':

         print("XGBoost Model training completed!")
+def explain_example(baseline_number, example_idx):
     """ Explain a given sample without retraining the model. """
     global global_model, X_train, X_test, y_train, y_test
         train_model()
     X, y = shap.datasets.adult()
+    X_base = shap.utils.sample(X, baseline_number)
+    explainer = shap.TreeExplainer(global_model, X_base)  # Use the TreeExplainer algorithm with background distribution
     shap_values = explainer.shap_values(X_test)  # Get shap values
     shap_values_exp = explainer(X_test)  # Get explainer for X_test
     # SHAP Summary Plot (BeeSwarm)
     st.write("### 📊 SHAP Summary Plot")
+    st.write("This plot provides an intuitive way to see how different features contribute to individual predictions, making model interpretations easier!")
     fig, ax = plt.subplots(figsize=(10, 5))
     shap.summary_plot(shap_values, X_test, show=False)
     st.pyplot(fig)
     # SHAP Summary Bar Plot
     st.write("### 📊 SHAP Feature Importance (Bar Plot)")
+    st.write("It helps understand which features the model relies on most.")
     fig, ax = plt.subplots(figsize=(10, 5))
     shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
     st.pyplot(fig)
     # SHAP Waterfall Plot
     st.write(f"### 🌊 SHAP Waterfall Plot for Example {example_idx}")
+    st.write(f"Visualize the SHAP values for an instance of interest")
     fig, ax = plt.subplots(figsize=(10, 5))
     shap.plots.waterfall(shap_values_exp[example_idx], show=False)
     st.pyplot(fig)
         train_model()
     # Streamlit UI Controls
+    baseline_number = st.sidebar.number_input(
+        label="Select the number of baseline:",
+        min_value=20,
+        max_value=1000,
+        value=100,  # Default value
+        step=1
     )
     example_idx = st.sidebar.number_input(
         step=1,  # Step size
         help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
     )
+    st.title("SHAP")
+    st.write(prompt_params.SHAP_INTRODUCTION)
     # Explain the selected sample
     if st.button("Explain Sample"):
+        explain_example(baseline_number, example_idx)
 if __name__ == '__main__':

src/prompt_config.py CHANGED Viewed

@@ -3,7 +3,20 @@ This application provides explainability for machine learning models using LIME
 It allows users to explore how different features influence model predictions by selecting
 specific samples and visualizing their explanations interactively.
 """
 LIME_KERNEL_WIDTH_HELP = """
 The `kernel_width` parameter in LIME controls the size of the neighborhood used to generate perturbations around a sample
@@ -27,6 +40,22 @@ Select the index of the example you want to explain.
 e.g., Example 100 is higher than 50K
 """
 ANCHORS_THRESHOLD_HELP = """
 The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
@@ -38,3 +67,14 @@ The `threshold` parameter controls the precision (confidence) level of the Ancho
 Choosing an appropriate threshold balances **rule reliability** and **availability**.
 """

 It allows users to explore how different features influence model predictions by selecting
 specific samples and visualizing their explanations interactively.
 """
+LIME_INTRODUCTION = """
+LIME (Local Interpretable Model-agnostic Explanations) is a technique used to interpret the predictions of black-box machine learning models.
+It provides local interpretability by approximating the decision boundary of the model in the vicinity of a specific instance with a simpler, interpretable model.
+The working principle of LIME includes the following steps:
+1. Select the instance to explain.
+2. Generate perturbed samples by randomly altering feature values to create similar data points.
+3. Obtain model predictions for these perturbed samples using the original black-box model.
+4. Compute similarity weights by assigning higher weights to samples more similar to the original instance.
+5. Train a local interpretable model (usually a weighted linear regression model).
+6. Interpret the results by analyzing the coefficients of the local model to understand feature contributions to the prediction.
+By using LIME, complex models become more transparent, enhancing their trustworthiness and interpretability.
+"""
 LIME_KERNEL_WIDTH_HELP = """
 The `kernel_width` parameter in LIME controls the size of the neighborhood used to generate perturbations around a sample
 e.g., Example 100 is higher than 50K
 """
+ANCHORS_INTRODUCTION = """
+Anchors provide model-agnostic interpretations similar to LIME but aim to generate highly **precise and human-interpretable rules** that sufficiently explain the model’s prediction for a given instance.
+The process of Anchors includes the following steps:
+1. Select the instance to explain.
+2. Generate perturbed samples but modify only **non-anchor** features while keeping others fixed.
+3. Get model predictions for these perturbed samples.
+4. Find **stable anchor conditions** that ensure the perturbed samples consistently receive the same prediction as the original instance.
+5. Trade-off between precision and coverage:
+   - **Precision**: The fraction of perturbed samples that match the original prediction under the anchor rule.
+   - **Coverage**: The fraction of all possible perturbations that the anchor rule applies to.
+6. Final explanation is presented as an **if-then rule**, making it highly interpretable.
+By using Anchors, interpretability improves by generating highly stable and human-readable explanations, making them particularly useful for high-stakes applications.
+"""
 ANCHORS_THRESHOLD_HELP = """
 The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
 Choosing an appropriate threshold balances **rule reliability** and **availability**.
 """
+SHAP_INTRODUCTION = """
+SHAP (SHapley Additive exPlanations) provides model-agnostic interpretations to fairly distribute the contribution of each feature to a model's prediction.
+The process of SHAP includes the following steps:
+1. **Train a Model**: Fit a machine learning model (e.g., XGBoost) on a dataset.
+2. **Select Baseline Data**: Choose a reference point (average prediction) to compute feature contributions.
+3. **Compute SHAP Values**: Quantify the contribution of each feature using a weighted average over all possible feature subsets.
+4. **Ensure Additivity**: The sum of SHAP values should match the model's prediction difference from the baseline.
+By using SHAP, **interpretability** improves by generating stable and mathematically sound explanations, making models more transparent and trustworthy.
+"""