Spaces:

Skier8402
/

ViT_timm_interp

Sleeping

App Files Files

xet

Community

Skier8402 commited on 29 days ago

Commit

8af51e2

verified ·

1 Parent(s): 427ab36

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +655 -34

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,661 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+'''
+streamlitapp.py — Vision Transformer Interpretability Dashboard (Streamlit app)
+This Streamlit app provides interpretability tools for vision transformer and CNN models.
+Features:
+- LIME explanations for image classification predictions
+- Uncertainty analysis via MC Dropout and Test-Time Augmentation (TTA)
+- Switch between Hugging Face (ViT, Swin, DeiT) and timm (ResNet, EfficientNet, ConvNeXt) models
+- Support for custom finetuned models and class mappings
+- Interactive sidebar for model selection and checkpoint upload
+- Feynman-style explanations and cheat-sheet for interpretability concepts
+Inspired by and reuses code from:
+- vit_and_captum.py (Integrated Gradients with Captum)
+- vit_lime_uncertainty.py (LIME explanations and uncertainty)
+- detr_and_interp.py (Grad-CAM for DETR, logging setup)
+'''
 import streamlit as st
+import html
+import numpy as np, torch, matplotlib.pyplot as plt
+from PIL import Image
+from transformers import AutoModelForImageClassification, AutoImageProcessor, PreTrainedModel
+from lime import lime_image
+import torchvision.transforms as T
+import timm
+from skimage.segmentation import slic, mark_boundaries
+import streamlit.components.v1 as components
+# Add logging
+import logging, os
+from logging.handlers import RotatingFileHandler
+LOG_DIR = os.path.join(os.path.dirname(__file__), "logs")
+os.makedirs(LOG_DIR, exist_ok=True)
+logfile = os.path.join(LOG_DIR, "interp.log")
+logger = logging.getLogger("interp")
+if not logger.handlers:
+    logger.setLevel(logging.INFO)
+    sh = logging.StreamHandler()
+    sh.setLevel(logging.INFO)
+    fh = RotatingFileHandler(logfile, maxBytes=5_000_000, backupCount=3, encoding="utf-8")
+    fh.setLevel(logging.INFO)
+    fmt = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
+    sh.setFormatter(fmt)
+    fh.setFormatter(fmt)
+    logger.addHandler(sh)
+    logger.addHandler(fh)
+# ---------------- Setup ----------------
+MODEL_NAME = "google/vit-base-patch16-224"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ---------- Sidebar model selectors ----------
+# Quick lists you can edit to test other HF / timm models
+HF_MODELS = [
+    "google/vit-base-patch16-224",
+    "facebook/deit-base-patch16-224",
+    "microsoft/swin-tiny-patch4-window7-224",
+    "google/vit-large-patch16-224",
+]
+TIMM_MODELS = [
+    "convnext_base",
+    "resnet50",
+    "efficientnet_b0",
+]
+def model_selector(slot_key: str, default_source="hf"):
+    source = st.sidebar.selectbox(
+        f"{slot_key} source",
+        ["hf", "timm"],
+        index=0 if default_source == "hf" else 1,
+        key=f"{slot_key}_source",
+    )
+    if source == "hf":
+        hf_choice = st.sidebar.selectbox(
+            f"{slot_key} Hugging Face model",
+            HF_MODELS,
+            index=0,
+            key=f"{slot_key}_hf",
+        )
+        return f"hf:{hf_choice}"
+    else:
+        timm_choice = st.sidebar.selectbox(
+            f"{slot_key} timm model",
+            TIMM_MODELS,
+            index=0,
+            key=f"{slot_key}_timm",
+        )
+        return f"timm:{timm_choice}"
+# ---------- Model Loader ----------
+# Use Streamlit caching when available to avoid repeated downloads
+try:
+    cache_decorator = st.cache_resource
+except Exception:
+    from functools import lru_cache
+    cache_decorator = lru_cache(maxsize=8)
+@cache_decorator
+def load_model(choice, checkpoint=None, class_map=None, num_classes=None):
+    """
+    Load a model from HF, timm, or a custom checkpoint
+    Args:
+        choice: Model identifier ('hf:model_name' or 'timm:model_name')
+        checkpoint: Optional path to custom checkpoint file
+        class_map: Optional dict mapping class indices to labels
+        num_classes: Optional number of classes for custom models
+    """
+    logger.info("Loading model: %s", choice)
+    is_hf = choice.startswith("hf:")
+    # Parse model identifier
+    if is_hf:
+        hf_name = choice.split("hf:")[1]
+        if checkpoint:  # Custom checkpoint
+            # For custom HF model, first load the architecture then apply weights
+            try:
+                if num_classes:
+                    model = AutoModelForImageClassification.from_pretrained(
+                        hf_name, num_labels=num_classes, ignore_mismatched_sizes=True
+                    ).to(device)
+                else:
+                    model = AutoModelForImageClassification.from_pretrained(hf_name).to(device)
+                # Load checkpoint with error handling
+                state_dict = torch.load(checkpoint, map_location=device)
+                # If state_dict is wrapped (common in training checkpoints)
+                if "model" in state_dict:
+                    state_dict = state_dict["model"]
+                elif "state_dict" in state_dict:
+                    state_dict = state_dict["state_dict"]
+                # Handle any prefix differences by checking and stripping if needed
+                if all(k.startswith('model.') for k in state_dict if k != 'config'):
+                    state_dict = {k[6:]: v for k, v in state_dict.items() if k != 'config'}
+                # Load with flexible partial loading (ignore missing/unexpected)
+                model.load_state_dict(state_dict, strict=False)
+                logger.info("Custom checkpoint loaded for HF model")
+                # If custom class mapping provided, update config
+                if class_map:
+                    model.config.id2label = class_map
+                    model.config.label2id = {v: int(k) for k, v in class_map.items()}
+            except Exception as e:
+                logger.error(f"Error loading custom HF model: {e}")
+                st.error(f"Failed to load custom model: {e}")
+                # Fallback to base model
+                model = AutoModelForImageClassification.from_pretrained(hf_name).to(device)
+        else:
+            # Standard HF model
+            model = AutoModelForImageClassification.from_pretrained(hf_name).to(device)
+        processor = AutoImageProcessor.from_pretrained(hf_name)
+    elif choice.startswith("timm:"):
+        name = choice.split("timm:")[1]
+        if checkpoint:  # Custom checkpoint
+            try:
+                # For timm, specify custom number of classes if provided
+                if num_classes:
+                    model = timm.create_model(name, pretrained=False, num_classes=num_classes).to(device)
+                else:
+                    model = timm.create_model(name, pretrained=True).to(device)
+                # Load checkpoint
+                state_dict = torch.load(checkpoint, map_location=device)
+                # Handle common checkpoint formats
+                if "model" in state_dict:
+                    state_dict = state_dict["model"]
+                elif "state_dict" in state_dict:
+                    state_dict = state_dict["state_dict"]
+                # Handle any prefix differences
+                if all(k.startswith('module.') for k in state_dict):
+                    state_dict = {k[7:]: v for k, v in state_dict}
+                model.load_state_dict(state_dict, strict=False)
+                logger.info("Custom checkpoint loaded for timm model")
+            except Exception as e:
+                logger.error(f"Error loading custom timm model: {e}")
+                st.error(f"Failed to load custom model: {e}")
+                # Fallback to pretrained
+                model = timm.create_model(name, pretrained=True).to(device)
+        else:
+            # Standard timm model
+            model = timm.create_model(name, pretrained=True).to(device)
+        # Use a standard processor for timm
+        processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224")
+    # Set model to eval mode
+    model.eval()
+    logger.info("Model %s loaded (eval mode)", choice)
+    # Return model, processor, flag for HF, and class map
+    return model, processor, is_hf, class_map
+# Add sidebar with clear sections
+st.sidebar.title("Model Selection")
+# Enhanced sidebar with custom model support
+with st.sidebar:
+    # Add tabs for standard vs custom models
+    tab1, tab2 = st.tabs(["Standard Models", "Custom Finetuned Models"])
+    with tab1:
+        st.markdown("### 📊 Standard Models")
+        st.markdown("Choose from pre-trained models:")
+        m1 = model_selector("Active Model", default_source="hf")
+        # Button to apply standard model change
+        if st.button("📋 Set as Active Model", help="Click to use the selected model for analysis", key="std_model_btn"):
+            with st.spinner(f"Loading {m1}..."):
+                model, processor, is_hf_model, _ = load_model(m1)
+                st.session_state.model = model
+                st.session_state.processor = processor
+                st.session_state.is_hf_model = is_hf_model
+                st.session_state.active_model = m1
+                st.session_state.using_custom = False
+                st.session_state.class_map = None
+                st.success(f"✅ Model activated: {m1}")
+    with tab2:
+        st.markdown("### 🔧 Custom Finetuned Model")
+        st.markdown("Use your own finetuned model:")
+        # Select base architecture
+        custom_source = st.selectbox(
+            "Base architecture source",
+            ["hf", "timm"],
+            key="custom_source"
+        )
+        if custom_source == "hf":
+            custom_base = st.selectbox(
+                "Hugging Face base model",
+                HF_MODELS,
+                key="custom_hf_base"
+            )
+            base_model = f"hf:{custom_base}"
+        else:
+            custom_base = st.selectbox(
+                "timm base model",
+                TIMM_MODELS,
+                key="custom_timm_base"
+            )
+            base_model = f"timm:{custom_base}"
+        # Upload checkpoint file
+        uploaded_checkpoint = st.file_uploader(
+            "Upload model checkpoint (.pth, .bin)",
+            type=["pth", "bin", "pt", "ckpt"],
+            help="Upload your finetuned model weights"
+        )
+        # Optional class mapping
+        custom_classes = st.number_input(
+            "Number of classes (if different from base model)",
+            min_value=0, max_value=1000, value=0,
+            help="Leave at 0 to use default classes from base model"
+        )
+        uploaded_labels = st.file_uploader(
+            "Upload class labels (optional JSON)",
+            type=["json"],
+            help="JSON file mapping class indices to labels: {\"0\": \"cat\", \"1\": \"dog\"}"
+        )
+        # Process label mapping
+        class_map = None
+        if uploaded_labels:
+            try:
+                import json
+                class_map = json.loads(uploaded_labels.getvalue().decode("utf-8"))
+                st.success(f"✓ Loaded {len(class_map)} class labels")
+            except Exception as e:
+                st.error(f"Error loading class labels: {e}")
+        # Store uploaded file in session state if provided
+        if uploaded_checkpoint:
+            # Save to a temporary file
+            import tempfile
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pth') as tmp_file:
+                tmp_file.write(uploaded_checkpoint.getvalue())
+                checkpoint_path = tmp_file.name
+                # Store in session state
+                if 'checkpoint_path' not in st.session_state:
+                    st.session_state.checkpoint_path = checkpoint_path
+            st.success("✓ Checkpoint ready to use")
+            # Button to apply custom model
+            if st.button("🚀 Load Custom Model", help="Click to use your custom model"):
+                with st.spinner(f"Loading custom model based on {base_model}..."):
+                    try:
+                        num_classes = custom_classes if custom_classes > 0 else None
+                        model, processor, is_hf_model, class_map = load_model(
+                            base_model, checkpoint_path, class_map, num_classes
+                        )
+                        st.session_state.model = model
+                        st.session_state.processor = processor
+                        st.session_state.is_hf_model = is_hf_model
+                        st.session_state.active_model = f"Custom {base_model}"
+                        st.session_state.using_custom = True
+                        st.session_state.class_map = class_map
+                        st.success(f"✅ Custom model activated!")
+                    except Exception as e:
+                        st.error(f"Failed to load custom model: {str(e)}")
+    # Explanation section
+    st.markdown("---")
+    st.markdown("### ℹ️ Model Types")
+    st.markdown("""
+    - **HF (Hugging Face)**: Vision Transformer models with standard interpretability
+    - **timm (PyTorch Image Models)**: Classical CNN architectures like ResNet, EfficientNet
+    *Custom models must match the base architecture's format.*
+    """)
+# Initialize model and processor from session state
+if 'active_model' not in st.session_state:
+    # First time loading - use default model
+    m1 = "hf:google/vit-base-patch16-224"
+    st.session_state.active_model = m1
+    model, processor, is_hf_model, _ = load_model(m1)
+    st.session_state.model = model
+    st.session_state.processor = processor
+    st.session_state.is_hf_model = is_hf_model
+    st.session_state.using_custom = False
+    st.session_state.class_map = None
+else:
+    # Get from session state
+    model = st.session_state.model
+    processor = st.session_state.processor
+    is_hf_model = st.session_state.is_hf_model
+# Initialize explainer
+explainer = lime_image.LimeImageExplainer()
+st.title("🧠 Vision Transformer Interpretability Dashboard")
+st.write("Upload an image and explore explanations with **LIME** and **Uncertainty Analysis**.")
+# Add a Feynman-style "How it works" explanation as a collapsible expander
+with st.expander("How it works — Feynman-style explanations (click to expand)", expanded=False):
+    st.markdown("""
+        ## 🧠 Vision Transformer Interpretability — Feynman-Style Explanations
+        ### Why do we care about interpretability & uncertainty?
+        Imagine you ask a kid to identify whether a picture is a cat. They point to the fur, ears, maybe whiskers. But what if the kid always focused on shadows, or background trees, instead of the cat itself? We want two things:
+        1. **Why** did the model say “cat”? What parts of the image made it decide so?
+        2. **How confident** is the model in that decision? Could small changes flip it?
+        Interpretable methods show us #1. Uncertainty estimation shows us #2. Together, they help us see not just *what* the model does, but *whether* we should trust it.
+        ### Key techniques, in plain analogies
+        - **LIME (Local Interpretable Model-agnostic Explanations)**: For a single image & prediction, LIME perturbs (changes) parts of the image, watches how the prediction changes, and fits a simple model locally to understand which parts are most influential.
+            - Analogy: Like shining small spotlights on different parts of a stage during a play: you dim a section, see how the actor’s reaction changes. The parts whose dimming changes the reaction most are parts the actor depends on.
+        - **Uncertainty in LIME (multiple LIME runs)**: Because LIME uses randomness (perturbing patches), different runs can give different “important” regions. Measuring how much they differ tells you how stable/fragile the explanation is.
+            - Analogy: If you ask several cooks what the dominant spice in a stew is and everyone agrees, you're confident; if opinions vary, your knowledge is shakier.
+        - **MC Dropout (Monte Carlo Dropout)**: Leave dropout on at inference time and run the model multiple times. The spread of predictions is a proxy for epistemic uncertainty.
+            - Analogy: Like a jury where each juror occasionally misses a sentence; if the verdict remains the same across many "faulty hearing" runs, trust it more.
+        - **Test-Time Augmentation (TTA) Uncertainty**: Apply small transforms (crops, flips) at inference and watch prediction variance. High variance → brittle model.
+            - Analogy: Take photos under slightly different lighting/angles; if the label flips, the model may depend on superficial cues.
+        ### How to read the visuals
+        - LIME highlights: bright / colored superpixels = influential regions. If background or artifacts light up, that's a red flag.
+        - LIME uncertainty heatmap: high std in a region means attributions are unstable there.
+        - MC Dropout / TTA histograms: narrow/tall peak = confident, wide/multi-modal = uncertain.
+        ### Limitations & caveats
+        - Stable explanations can still be consistently wrong if the model learned a bias.
+        - MC Dropout is an approximation — it helps but doesn't fully replace calibrated probabilistic methods.
+        - TTA shows input sensitivity, not full distributional shift robustness.
+        ### Quick example (walkthrough)
+        1. Upload image → model predicts label with some probability.
+        2. LIME finds important superpixels; multiple LIME runs give mean + std maps.
+        3. MC Dropout produces a histogram over runs; use it to judge epistemic uncertainty.
+        4. TTA shows sensitivity to small input changes.
+        ### Practical tips
+        - Use explanation + uncertainty to guide active learning: label cases where the model is uncertain or explanations are unstable.
+        - For safety-critical systems, combine these visual signals with human review and stricter failure thresholds.
+        ### Where to read more
+        - Christoph Molnar — Interpretable Machine Learning (chapter on LIME): https://christophm.github.io/interpretable-ml-book/lime.html
+        - Ribeiro et al., "Why Should I Trust You?" (original LIME paper): https://homes.cs.washington.edu/~marcotcr/blog/lime/
+        - Zhang et al., "Why Should You Trust My Explanation?" (LIME reliability): https://arxiv.org/abs/1904.12991
+        - MC Dropout practical guide & notes: https://medium.com/@ciaranbench/monte-carlo-dropout-a-practical-guide-4b4dc18014b5
+        """)
+# Compact one-page cheat-sheet (quick flags & checks)
+with st.expander("Cheat-sheet — Quick flags & warnings", expanded=False):
+        cheat_text = """
+Quick checks when an explanation looks suspicious
+- Red flag: LIME highlights background or repeated dataset artifacts (logos, borders) — model may have learned spurious cues.
+- Red flag: LIME attribution std is high in key regions — explanation unstable; try different segmentations or more samples.
+- Red flag: MC Dropout or TTA histograms are multi-modal or very wide — model uncertain; consider human review or abstain.
+- Quick fixes: increase dataset diversity, add regularization, try different segmentation_fn parameters, or collect more labels for uncertain cases.
+One-line definitions
+- LIME: perturb + fit simple local model to explain a single prediction.
+- MC Dropout: enable dropout at inference and sample to estimate epistemic uncertainty.
+- TTA: apply small input transforms at inference to measure sensitivity / aleatoric uncertainty.
+Pro-tip: Use explanation + uncertainty to drive active learning: pick instances with high prediction uncertainty or unstable explanations for labeling.
 """
+        # Show the cheat-sheet as markdown
+        st.markdown(cheat_text)
+        # Download button for the cheat-sheet as plain text
+        try:
+                st.download_button(
+                        label="Download cheat-sheet (.txt)",
+                        data=cheat_text,
+                        file_name="cheat_sheet.txt",
+                        mime="text/plain",
+                )
+        except Exception:
+                # Streamlit may raise if download_button isn't available in some environments; ignore gracefully
+                pass
+        # Copy-to-clipboard button using a small HTML+JS snippet
+        escaped = html.escape(cheat_text)
+        copy_html = f"""
+        <div>
+            <button id='copy-btn' style='padding:6px 10px;border-radius:4px;'>Copy cheat-sheet</button>
+            <script>
+                const btn = document.getElementById('copy-btn');
+                btn.addEventListener('click', async () => {{
+                    try {{
+                        await navigator.clipboard.writeText(`{escaped}`);
+                        btn.innerText = 'Copied!';
+                        setTimeout(() => btn.innerText = 'Copy cheat-sheet', 1500);
+                    }} catch (e) {{
+                        btn.innerText = 'Copy failed';
+                    }}
+                }});
+            </script>
+        </div>
+        """
+        components.html(copy_html, height=70)
+# Display active model clearly in the main panel
+is_custom = st.session_state.get('using_custom', False)
+custom_badge = " 🔧 Custom" if is_custom else ""
+st.markdown(f"### Active Model: `{st.session_state.active_model}{custom_badge}`")
+model_type = "Hugging Face Transformer" if is_hf_model else "timm CNN Architecture"
+st.caption(f"Model type: {model_type}")
+# ---------------- Helpers ----------------
+def classifier_fn(images_batch):
+    # Use current model/processor from session state
+    inputs = processor(images=[Image.fromarray(x.astype(np.uint8)) for x in images_batch],
+                      return_tensors="pt").to(device)
+    with torch.no_grad():
+        if is_hf_model:
+            outputs = model(**inputs)
+            logits = outputs.logits
+        else:
+            x = inputs['pixel_values']
+            logits = model(x)
+        probs = torch.softmax(logits, dim=-1).cpu().numpy()
+    return probs
+def predict_probs(pil_img):
+    # Use current model/processor from session state
+    inputs = processor(images=pil_img, return_tensors="pt").to(device)
+    with torch.no_grad():
+        if is_hf_model:
+            outputs = model(**inputs)
+            logits = outputs.logits
+        else:
+            x = inputs['pixel_values']
+            logits = model(x)
+        probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
+    return probs
+# ---------------- Upload ----------------
+uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"])
+if uploaded:
+    img = Image.open(uploaded).convert("RGB").resize((224,224))
+    logger.info("Uploaded image received (size=%s)", img.size)
+    st.image(img, caption="Uploaded image", use_container_width=True)
+    # ---------------- Prediction ----------------
+    probs = predict_probs(img)
+    pred_idx = int(np.argmax(probs))
+    # Get label - handle models differently based on source
+    if is_hf_model:
+        # Use model's config.id2label if available
+        pred_label = model.config.id2label[pred_idx]
+    elif st.session_state.get('class_map'):
+        # Use custom class map if provided (access defensively)
+        _class_map = st.session_state.get('class_map')
+        pred_label = _class_map.get(str(pred_idx), f"Class {pred_idx}") if _class_map is not None else f"Class {pred_idx}"
+    else:
+        # For timm models without labels
+        pred_label = f"Class {pred_idx}"
+    pred_prob = float(probs[pred_idx])
+    logger.info("Prediction: %s (%.3f)", pred_label, pred_prob)
+    st.subheader("🔮 Prediction")
+    st.write(f"**Top-1:** {pred_label} ({pred_prob:.3f})")
+    if not is_hf_model and not st.session_state.get('class_map'):
+        st.info("ℹ️ Using model without class names. Upload a class mapping in the sidebar for friendly labels.")
+    # ---------------- LIME ----------------
+    st.subheader("📍 LIME Attribution")
+    st.markdown("""
+    **Local Interpretable Model-agnostic Explanations (LIME)** is a technique that approximates how a complex model (like ViT or ResNet) makes decisions for a specific input by creating a simpler, interpretable model around it.
+    It perturbs the image into segments and sees which ones most influence the prediction, revealing what the model "sees" as important.
+    This is crucial for debugging biases or understanding if the model focuses on relevant features vs. artifacts.
+    """)
+    img_np = np.array(img)
+    with st.spinner("Generating LIME explanation..."):
+        exp = explainer.explain_instance(
+            img_np, classifier_fn=classifier_fn, top_labels=1, num_samples=1000,
+            segmentation_fn=lambda x: slic(x, n_segments=60, compactness=9, start_label=0)
+        )
+        temp, mask = exp.get_image_and_mask(pred_idx, positive_only=True,
+                                            num_features=8, hide_rest=False)
+        lime_img = mark_boundaries(temp/255.0, mask)
+    st.image(lime_img, caption=f"LIME highlights regions important for '{pred_label}'")
+    st.info("""
+    **How to read:** Bright (or colored) segments show areas the model relied on most for its prediction – these are the "superpixels" that, when altered, change the output the most.
+    Green/red overlays often indicate positive/negative contributions. If irrelevant background or edges light up, it might signal the model learned spurious correlations (e.g., from training data artifacts).
+    Furthermore, this builds trust by showing if AI decisions align with human intuition.
+    """)
+    # ---------------- LIME Uncertainty ----------------
+    st.subheader("📊 LIME Attribution Uncertainty")
+    st.markdown("""
+    Uncertainty in explanations arises because LIME is stochastic – it samples perturbations randomly. By running LIME multiple times, we can measure variability in attributions,
+    highlighting if the model's reasoning is consistent or fragile for this image. High variability suggests the explanation (and thus model confidence) isn't robust.
+    """)
+    logger.info("Starting LIME uncertainty runs (n=5)")
+    maps = []
+    for i in range(5):
+        logger.debug("LIME run %d", i+1)
+        exp = explainer.explain_instance(
+            img_np, classifier_fn=classifier_fn, top_labels=1, num_samples=500,
+            segmentation_fn=lambda x: slic(x, n_segments=60, compactness=9, start_label=0)
+        )
+        local_exp = dict(exp.local_exp)[pred_idx]
+        segments = exp.segments
+        attr_map = np.zeros(segments.shape)
+        for seg_id, weight in local_exp:
+            attr_map[segments == seg_id] = weight
+        maps.append(attr_map)
+    maps = np.stack(maps)
+    mean_attr, std_attr = maps.mean(0), maps.std(0)
+    fig, ax = plt.subplots(1,2, figsize=(8,4))
+    im1 = ax[0].imshow(mean_attr, cmap="jet"); ax[0].set_title("Mean attribution"); ax[0].axis("off")
+    plt.colorbar(im1, ax=ax[0], fraction=0.046)
+    im2 = ax[1].imshow(std_attr, cmap="hot"); ax[1].set_title("Attribution std (uncertainty)"); ax[1].axis("off")
+    plt.colorbar(im2, ax=ax[1], fraction=0.046)
+    st.pyplot(fig)
+    st.info("""
+    **How to read:** The left heatmap shows average importance across runs (hotter = more influential). The right shows standard deviation – high std (yellow/red) means unstable explanations for those regions.
+    If uncertainty is high in key areas, the model might overfit or need more diverse training data. This helps ML practitioners quantify explanation reliability.
+    """)
+    logger.info("Completed LIME uncertainty runs")
+    # ---------------- MC Dropout ----------------
+    st.subheader("🎲 MC Dropout Uncertainty")
+    st.markdown("""
+    Monte Carlo (MC) Dropout treats dropout layers (normally off during inference) as a Bayesian approximation to estimate epistemic uncertainty – how much the model "doesn't know" due to limited training.
+    By enabling dropout and sampling predictions multiple times, we see if the model consistently agrees on the class or wavers, indicating potential unreliability.
+    """)
+    logger.info("Starting MC Dropout sampling")
+    model.train()  # enable dropout
+    mc_preds = []
+    with torch.no_grad():
+        for _ in range(30):
+            probs_mc = predict_probs(img)
+            mc_preds.append(probs_mc)
+    model.eval()
+    mc_preds = np.stack(mc_preds)
+    mc_mean = mc_preds.mean(0)
+    mc_top = mc_mean.argmax()
+    if is_hf_model:
+        mc_label = model.config.id2label[mc_top]
+    elif st.session_state.get('class_map'):
+        _class_map = st.session_state.get('class_map')
+        mc_label = _class_map.get(str(mc_top), f"Class {mc_top}") if _class_map is not None else f"Class {mc_top}"
+    else:
+        mc_label = f"Class {mc_top}"
+    p = mc_preds[:, mc_top]
+    fig, ax = plt.subplots()
+    ax.hist(p, bins=15, color="C0")
+    ax.set_title(f"MC Dropout: p({mc_label}) across samples")
+    st.pyplot(fig)
+    st.info("""
+    **How to read:** This histogram shows probability distributions for the top class across 30 samples. A narrow, peaked distribution means stable confidence (low uncertainty).
+    A wide spread or multiple modes suggests the model is unsure, possibly due to out-of-distribution inputs. For devs, this flags cases needing human review; it highlights risky predictions.
+    """)
+    logger.info("Completed MC Dropout: top=%s", mc_label)
+    # ---------------- Test-Time Augmentation (TTA) Uncertainty ----------------
+    st.subheader("🔄 Test-Time Augmentation (TTA) Uncertainty")
+    st.markdown("""
+    Test-Time Augmentation (TTA) applies random transformations (crops, flips) at inference to probe aleatoric uncertainty – noise inherent in the input or model.
+    If predictions vary wildly under small changes, the model relies on brittle features, revealing data-related issues rather than model knowledge gaps.
+    """)
+    logger.info("Starting TTA sampling")
+    tta_tfms = T.Compose([T.Resize(256), T.RandomResizedCrop(224, scale=(0.9,1.0)), T.RandomHorizontalFlip(p=0.5)])
+    tta_preds = []
+    with torch.no_grad():
+        for _ in range(20):
+            aug = tta_tfms(img)
+            probs_tta = predict_probs(aug)
+            tta_preds.append(probs_tta)
+    tta_preds = np.stack(tta_preds)
+    tta_mean = tta_preds.mean(0)
+    tta_top = tta_mean.argmax()
+    if is_hf_model:
+        tta_label = model.config.id2label[tta_top]
+    elif st.session_state.get('class_map'):
+        _class_map = st.session_state.get('class_map')
+        tta_label = _class_map.get(str(tta_top), f"Class {tta_top}") if _class_map is not None else f"Class {tta_top}"
+    else:
+        tta_label = f"Class {tta_top}"
+    p_tta = tta_preds[:, tta_top]
+    fig, ax = plt.subplots()
+    ax.hist(p_tta, bins=15, color="C1")
+    ax.set_title(f"TTA: p({tta_label}) across augmentations")
+    st.pyplot(fig)
+    st.info("""
+    **How to read:** Similar to MC Dropout, but focused on input variations. Low variance means the prediction is robust to perturbations (good sign). High variance indicates sensitivity to details like lighting/position,
+    common in overfitted models. Use this to assess if your AI system handles real-world variability well.
+    """)
+    logger.info("Completed TTA: top=%s", tta_label)
+# ---------------- Summary ----------------