Spaces:

peggy30
/

ExplainableAI

Sleeping

App Files Files Community

peggy30 commited on Feb 25

Commit

e1da86b

1 Parent(s): d4ef106

add anchors

Browse files

Files changed (4) hide show

.idea/workspace.xml +9 -1
pages/Anchors.py +135 -0
pages/LIME.py +2 -3
src/prompt_config.py +14 -1

.idea/workspace.xml CHANGED Viewed

@@ -2,7 +2,10 @@
 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="">
       <change beforePath="$PROJECT_DIR$/pages/LIME.py" beforeDir="false" afterPath="$PROJECT_DIR$/pages/LIME.py" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -33,9 +36,14 @@
     <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
     <property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
     <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
-    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
   </component>
   <component name="SvnConfiguration">
     <configuration />
   </component>

 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="d4d4c856-4e4e-4d5f-b4ca-4c1c8515b14a" name="Default Changelist" comment="">
+      <change afterPath="$PROJECT_DIR$/pages/Anchors.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/pages/LIME.py" beforeDir="false" afterPath="$PROJECT_DIR$/pages/LIME.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/src/prompt_config.py" beforeDir="false" afterPath="$PROJECT_DIR$/src/prompt_config.py" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
     <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
     <property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
     <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
+    <property name="last_opened_file_path" value="$PROJECT_DIR$/pages" />
     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
   </component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/pages" />
+    </key>
+  </component>
   <component name="SvnConfiguration">
     <configuration />
   </component>

pages/Anchors.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# Import Libraries
+import pandas as pd
+import streamlit as st
+import src.prompt_config as prompt_params
+# Models
+import xgboost
+from sklearn.model_selection import train_test_split
+# XAI (Explainability)
+import shap
+import lime
+from anchor import anchor_tabular
+# Global Variables to Store Model & Data
+global_model = None
+X_train, X_test, y_train, y_test = None, None, None, None
+def train_model():
+    """ Train the XGBoost model only once and store it globally. """
+    global global_model, X_train, X_test, y_train, y_test
+    if global_model is None:
+        # Load Data from SHAP library
+        X, y = shap.datasets.adult()
+        # Split data
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
+        # Train XGBoost model
+        global_model = xgboost.XGBClassifier()
+        global_model.fit(X_train, y_train)
+        print("XGBoost Model training completed!")
+def define_features():
+    """ Define feature names and categorical mappings. """
+    feature_names = ["Age", "Workclass",
+                     "Education-Num", "Marital Status", "Occupation",
+                     "Relationship", "Race", "Sex", "Capital Gain",
+                     "Capital Loss", "Hours per week", "Country"]
+    categorical_features = ["Workclass", "Marital Status", "Occupation", "Relationship", "Race", "Sex", "Country"]
+    class_names = ['<=50K', '>50K']
+    categorical_names = {
+        1: ['Private', 'Self-emp-not-inc', 'Self-emp-inc', 'Federal-gov', 'Local-gov', 'State-gov', 'Without-pay',
+            'Never-worked'],
+        3: ['Married-civ-spouse', 'Divorced', 'Never-married', 'Separated', 'Widowed', 'Married-spouse-absent',
+            'Married-AF-spouse'],
+        4: ['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty',
+            'Handlers-cleaners',
+            'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv',
+            'Protective-serv', 'Armed-Forces'],
+        5: ['Wife', 'Own-child', 'Husband', 'Not-in-family', 'Other-relative', 'Unmarried'],
+        6: ['White', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other', 'Black'],
+        7: ['Female', 'Male'],
+        11: ['United-States', 'Cambodia', 'England', 'Puerto-Rico', 'Canada', 'Germany', 'Outlying-US(Guam-USVI-etc)',
+             'India',
+             'Japan', 'Greece', 'South', 'China', 'Cuba', 'Iran', 'Honduras', 'Philippines', 'Italy', 'Poland',
+             'Jamaica', 'Vietnam',
+             'Mexico', 'Portugal', 'Ireland', 'France', 'Dominican-Republic', 'Laos', 'Ecuador', 'Taiwan', 'Haiti',
+             'Columbia', 'Hungary',
+             'Guatemala', 'Nicaragua', 'Scotland', 'Thailand', 'Yugoslavia', 'El-Salvador', 'Trinadad&Tobago', 'Peru',
+             'Hong', 'Holand-Netherlands']
+    }
+    return feature_names, categorical_features, class_names, categorical_names
+def explain_example(anchors_threshold, example_idx):
+    """ Explain a given sample without retraining the model. """
+    global global_model, X_train, X_test, y_train, y_test
+    if global_model is None:
+        train_model()
+    feature_names, categorical_features, class_names, categorical_names = define_features()
+    # Initialize Anchors explainer
+    explainer = anchor_tabular.AnchorTabularExplainer(
+        class_names,
+        feature_names,
+        X_train.values,
+        categorical_names)
+    # Explain the selected sample
+    exp = explainer.explain_instance(X_test.values[example_idx], global_model.predict, threshold=anchors_threshold)
+    explanation_data = {
+        "Feature Rule": exp.names(),
+        "Precision": [f"{exp.precision():.2f}"] * len(exp.names()),
+        "Coverage": [f"{exp.coverage():.2f}"] * len(exp.names())
+    }
+    df_explanation = pd.DataFrame(explanation_data)
+    st.table(df_explanation)
+def main():
+    global global_model
+    # Ensure the model is trained only once
+    if global_model is None:
+        train_model()
+    anchors_threshold = st.sidebar.slider(
+        label="Set the `Threshold` value:",
+        min_value=0.00,
+        max_value=1.00,
+        value=0.8,  # Default value
+        step=0.01,  # Step size
+        help=prompt_params.ANCHORS_THRESHOLD_HELP,
+    )
+    example_idx = st.sidebar.number_input(
+        label="Select the sample index to explain:",
+        min_value=0,
+        max_value=len(X_test) - 1,  # Ensures the index is within range
+        value=1,  # Default value
+        step=1,  # Step size
+        help=prompt_params.EXAMPLE_BE_EXPLAINED_IDX,
+    )
+    # Explain the selected sample
+    if st.button("Explain Sample"):
+        explain_example(anchors_threshold, example_idx)
+if __name__ == '__main__':
+    main()

pages/LIME.py CHANGED Viewed

@@ -41,7 +41,6 @@ def train_model():
 def define_features():
     """ Define feature names and categorical mappings. """
     feature_names = ["Age", "Workclass",
                      "Education-Num", "Marital Status", "Occupation",
                      "Relationship", "Race", "Sex", "Capital Gain",
@@ -100,7 +99,7 @@ def explain_example(kernel_width, example_idx):
     explanation_html = exp.as_html()
     # Display explanation in Streamlit
-    components.html(explanation_html, height=600, scrolling=True)
 def main():
@@ -120,7 +119,7 @@ def main():
         help=prompt_params.LIME_KERNEL_WIDTH_HELP,
     )
-    example_idx = st.sidebar.slider(
         label="Select the sample index to explain:",
         min_value=0,
         max_value=len(X_test) - 1,  # Ensures the index is within range

 def define_features():
     """ Define feature names and categorical mappings. """
     feature_names = ["Age", "Workclass",
                      "Education-Num", "Marital Status", "Occupation",
                      "Relationship", "Race", "Sex", "Capital Gain",
     explanation_html = exp.as_html()
     # Display explanation in Streamlit
+    components.html(explanation_html, height=700, scrolling=True)
 def main():
         help=prompt_params.LIME_KERNEL_WIDTH_HELP,
     )
+    example_idx = st.sidebar.number_input(
         label="Select the sample index to explain:",
         min_value=0,
         max_value=len(X_test) - 1,  # Ensures the index is within range

src/prompt_config.py CHANGED Viewed

@@ -24,4 +24,17 @@ for explanation. It determines how far the generated synthetic data points will
 EXAMPLE_BE_EXPLAINED_IDX="""
 Select the index of the example you want to explain.
-"""

 EXAMPLE_BE_EXPLAINED_IDX="""
 Select the index of the example you want to explain.
+e.g., Example 100 is higher than 50K
+"""
+ANCHORS_THRESHOLD_HELP = """
+The `threshold` parameter controls the precision (confidence) level of the Anchor rule.
+- It defines the **minimum confidence** required for an Anchor rule to be considered valid.
+- The typical range is **0.8 to 0.95**:
+  - Lower values (e.g., 0.7) allow more flexible rules but may include some noise.
+  - Higher values (e.g., 0.95) ensure highly reliable rules but make them harder to find.
+- If set to **1.0**, only rules with 100% confidence will be accepted, which may result in no valid rules being found.
+Choosing an appropriate threshold balances **rule reliability** and **availability**.
+"""