Spaces:
Sleeping
Sleeping
Update app with improved UI and markdown formatting
Browse files- .gitignore +53 -1
- .python-version +1 -0
- README.md +1 -1
- __pycache__/ddpg.cpython-310.pyc +0 -0
- app.py +183 -53
- pyproject.toml +23 -0
- requirements_hf.txt +13 -0
- uv.lock +0 -0
.gitignore
CHANGED
|
@@ -1 +1,53 @@
|
|
| 1 |
-
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
|
| 3 |
+
# Python
|
| 4 |
+
__pycache__/
|
| 5 |
+
*.py[cod]
|
| 6 |
+
*$py.class
|
| 7 |
+
*.so
|
| 8 |
+
.Python
|
| 9 |
+
build/
|
| 10 |
+
develop-eggs/
|
| 11 |
+
dist/
|
| 12 |
+
downloads/
|
| 13 |
+
eggs/
|
| 14 |
+
.eggs/
|
| 15 |
+
lib/
|
| 16 |
+
lib64/
|
| 17 |
+
parts/
|
| 18 |
+
sdist/
|
| 19 |
+
var/
|
| 20 |
+
wheels/
|
| 21 |
+
*.egg-info/
|
| 22 |
+
.installed.cfg
|
| 23 |
+
*.egg
|
| 24 |
+
|
| 25 |
+
# Virtual environments
|
| 26 |
+
.venv/
|
| 27 |
+
venv/
|
| 28 |
+
ENV/
|
| 29 |
+
env/
|
| 30 |
+
|
| 31 |
+
# IDE
|
| 32 |
+
.vscode/
|
| 33 |
+
.idea/
|
| 34 |
+
*.swp
|
| 35 |
+
*.swo
|
| 36 |
+
|
| 37 |
+
# Jupyter Notebook
|
| 38 |
+
.ipynb_checkpoints
|
| 39 |
+
|
| 40 |
+
# Model files (if large)
|
| 41 |
+
*.pth
|
| 42 |
+
*.pt
|
| 43 |
+
*.h5
|
| 44 |
+
*.pkl
|
| 45 |
+
*.joblib
|
| 46 |
+
|
| 47 |
+
# Logs
|
| 48 |
+
*.log
|
| 49 |
+
logs/
|
| 50 |
+
|
| 51 |
+
# Temporary files
|
| 52 |
+
tmp/
|
| 53 |
+
temp/
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.10
|
README.md
CHANGED
|
@@ -54,4 +54,4 @@ For each step, the reward:
|
|
| 54 |
|
| 55 |
## `train()` and `load_trained()`
|
| 56 |
|
| 57 |
-
`load_trained()` function loads a pre-trained model that ran through 1000 episodes of training, while `train()` does training from scratch. You can edit which one of the functions is running from the bottom of the main.py file. If you set render_mode=False, the program will train a lot faster.
|
|
|
|
| 54 |
|
| 55 |
## `train()` and `load_trained()`
|
| 56 |
|
| 57 |
+
`load_trained()` function loads a pre-trained model that ran through 1000 episodes of training, while `train()` does training from scratch. You can edit which one of the functions is running from the bottom of the main.py file. If you set render_mode=False, the program will train a lot faster.
|
__pycache__/ddpg.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/ddpg.cpython-310.pyc and b/__pycache__/ddpg.cpython-310.pyc differ
|
|
|
app.py
CHANGED
|
@@ -3,14 +3,14 @@ from train import TrainingLoop
|
|
| 3 |
from scipy.special import softmax
|
| 4 |
import numpy as np
|
| 5 |
|
| 6 |
-
|
| 7 |
train = None
|
| 8 |
-
|
| 9 |
frames, attributions = None, None
|
| 10 |
|
| 11 |
-
|
|
|
|
| 12 |
0: "X-coordinate",
|
| 13 |
-
1: "Y-coordinate",
|
| 14 |
2: "Linear velocity in the X-axis",
|
| 15 |
3: "Linear velocity in the Y-axis",
|
| 16 |
4: "Angle",
|
|
@@ -20,76 +20,206 @@ lunar_lander_spec_conversion = {
|
|
| 20 |
}
|
| 21 |
|
| 22 |
def create_training_loop(env_spec):
|
|
|
|
| 23 |
global train
|
| 24 |
train = TrainingLoop(env_spec=env_spec)
|
| 25 |
train.create_agent()
|
| 26 |
-
|
| 27 |
return train.env.spec
|
| 28 |
|
| 29 |
def display_softmax(inputs):
|
|
|
|
| 30 |
inputs = np.array(inputs)
|
| 31 |
probabilities = softmax(inputs)
|
| 32 |
-
|
| 33 |
-
softmax_dict = {
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
return softmax_dict
|
| 36 |
|
| 37 |
def generate_output(num_iterations, option):
|
|
|
|
| 38 |
global frames, attributions
|
| 39 |
-
frames, attributions = train.explain_trained(
|
|
|
|
|
|
|
|
|
|
| 40 |
slider.maximum = len(frames)
|
| 41 |
|
| 42 |
def get_frame_and_attribution(slider_value):
|
|
|
|
| 43 |
global frames, attributions
|
| 44 |
slider_value = min(slider_value, len(frames) - 1)
|
| 45 |
frame = frames[slider_value]
|
| 46 |
-
|
| 47 |
-
print(f"{frame.shape
|
| 48 |
-
|
| 49 |
attribution = display_softmax(attributions[slider_value])
|
| 50 |
-
|
| 51 |
return frame, attribution
|
| 52 |
|
| 53 |
-
with gr.Blocks(
|
| 54 |
-
|
| 55 |
-
gr.
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
""")
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
with gr.Row():
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
|
|
|
|
|
|
| 3 |
from scipy.special import softmax
|
| 4 |
import numpy as np
|
| 5 |
|
| 6 |
+
# Global variables for training and data storage
|
| 7 |
train = None
|
|
|
|
| 8 |
frames, attributions = None, None
|
| 9 |
|
| 10 |
+
# Lunar Lander environment state feature mapping
|
| 11 |
+
LUNAR_LANDER_FEATURES = {
|
| 12 |
0: "X-coordinate",
|
| 13 |
+
1: "Y-coordinate",
|
| 14 |
2: "Linear velocity in the X-axis",
|
| 15 |
3: "Linear velocity in the Y-axis",
|
| 16 |
4: "Angle",
|
|
|
|
| 20 |
}
|
| 21 |
|
| 22 |
def create_training_loop(env_spec):
|
| 23 |
+
"""Initialize the training loop with the specified environment."""
|
| 24 |
global train
|
| 25 |
train = TrainingLoop(env_spec=env_spec)
|
| 26 |
train.create_agent()
|
|
|
|
| 27 |
return train.env.spec
|
| 28 |
|
| 29 |
def display_softmax(inputs):
|
| 30 |
+
"""Convert raw attribution values to softmax probabilities for visualization."""
|
| 31 |
inputs = np.array(inputs)
|
| 32 |
probabilities = softmax(inputs)
|
| 33 |
+
|
| 34 |
+
softmax_dict = {
|
| 35 |
+
name: float(prob)
|
| 36 |
+
for name, prob in zip(LUNAR_LANDER_FEATURES.values(), probabilities)
|
| 37 |
+
}
|
| 38 |
return softmax_dict
|
| 39 |
|
| 40 |
def generate_output(num_iterations, option):
|
| 41 |
+
"""Generate attribution explanations for the trained agent."""
|
| 42 |
global frames, attributions
|
| 43 |
+
frames, attributions = train.explain_trained(
|
| 44 |
+
num_iterations=num_iterations,
|
| 45 |
+
option=option
|
| 46 |
+
)
|
| 47 |
slider.maximum = len(frames)
|
| 48 |
|
| 49 |
def get_frame_and_attribution(slider_value):
|
| 50 |
+
"""Get frame and attribution data for the selected timestep."""
|
| 51 |
global frames, attributions
|
| 52 |
slider_value = min(slider_value, len(frames) - 1)
|
| 53 |
frame = frames[slider_value]
|
| 54 |
+
|
| 55 |
+
print(f"Frame shape: {frame.shape}")
|
| 56 |
+
|
| 57 |
attribution = display_softmax(attributions[slider_value])
|
|
|
|
| 58 |
return frame, attribution
|
| 59 |
|
| 60 |
+
with gr.Blocks(
|
| 61 |
+
title="Deep RL Explainability",
|
| 62 |
+
theme=gr.themes.Soft(),
|
| 63 |
+
css="""
|
| 64 |
+
.gradio-container {
|
| 65 |
+
max-width: 1200px !important;
|
| 66 |
+
}
|
| 67 |
+
.tab-nav {
|
| 68 |
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
| 69 |
+
}
|
| 70 |
+
"""
|
| 71 |
+
) as demo:
|
| 72 |
+
|
| 73 |
+
# Header section
|
| 74 |
+
gr.Markdown("""
|
| 75 |
+
# ๐ Deep Reinforcement Learning Explainability
|
| 76 |
+
|
| 77 |
+
**Exploring AI decision-making through Integrated Gradients in RL environments**
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
""")
|
| 81 |
+
|
| 82 |
+
# Introduction section
|
| 83 |
+
gr.Markdown("""
|
| 84 |
+
## ๐ How This Works
|
| 85 |
+
|
| 86 |
+
This application demonstrates the application of **[Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients)**
|
| 87 |
+
to Deep Reinforcement Learning scenarios. We use PyTorch's Captum library for interpretability
|
| 88 |
+
and Gymnasium for the continuous Lunar Lander environment.
|
| 89 |
+
|
| 90 |
+
### ๐ง Training Algorithm: [DDPG](https://arxiv.org/abs/1509.02971)
|
| 91 |
+
|
| 92 |
+
The agent is trained using **Deep Deterministic Policy Gradients** and achieves an average reward
|
| 93 |
+
of **260.8** per episode (successful landings).
|
| 94 |
+
|
| 95 |
+
### ๐ฏ How to Use This Space
|
| 96 |
+
|
| 97 |
+
1. **Select Environment**: Choose the Lunar Lander environment
|
| 98 |
+
2. **Choose Baseline**: Select between zero tensor or running average baseline
|
| 99 |
+
3. **Generate Attributions**: Click "ATTRIBUTE" and wait ~20-25 seconds
|
| 100 |
+
4. **Explore Results**: Use the slider to examine attributions at different timesteps
|
| 101 |
+
|
| 102 |
+
The attributions are normalized using Softmax to provide interpretable probability distributions.
|
| 103 |
+
""")
|
| 104 |
+
|
| 105 |
+
# Main interface tab
|
| 106 |
+
with gr.Tab("๐ Attribution Analysis", elem_id="attribution-tab"):
|
| 107 |
+
|
| 108 |
+
# Environment setup
|
| 109 |
+
gr.Markdown("### ๐ Environment Setup")
|
| 110 |
+
env_spec = gr.Dropdown(
|
| 111 |
+
choices=["LunarLander-v2"],
|
| 112 |
+
type="value",
|
| 113 |
+
multiselect=False,
|
| 114 |
+
label="Environment Specification",
|
| 115 |
+
value="LunarLander-v2",
|
| 116 |
+
info="Select the RL environment to analyze"
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
env_interface = gr.Interface(
|
| 120 |
+
title="Initialize Environment",
|
| 121 |
+
allow_flagging="never",
|
| 122 |
+
inputs=env_spec,
|
| 123 |
+
fn=create_training_loop,
|
| 124 |
+
outputs=gr.JSON(label="Environment Spec"),
|
| 125 |
+
description="Click to initialize the training environment"
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Attribution controls
|
| 129 |
+
gr.Markdown("### โ๏ธ Attribution Configuration")
|
| 130 |
+
|
| 131 |
with gr.Row():
|
| 132 |
+
with gr.Column(scale=1):
|
| 133 |
+
option = gr.Dropdown(
|
| 134 |
+
choices=["Torch Tensor of 0's", "Running Average"],
|
| 135 |
+
type="index",
|
| 136 |
+
label="Baseline Method",
|
| 137 |
+
info="Choose the baseline for Integrated Gradients"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
with gr.Column(scale=1):
|
| 141 |
+
baselines = gr.Slider(
|
| 142 |
+
label="Number of Baseline Iterations",
|
| 143 |
+
interactive=True,
|
| 144 |
+
minimum=0,
|
| 145 |
+
maximum=100,
|
| 146 |
+
value=10,
|
| 147 |
+
step=5,
|
| 148 |
+
info="Number of baseline inputs to collect for averaging"
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
# Generate button
|
| 152 |
+
generate_btn = gr.Button(
|
| 153 |
+
"๐ GENERATE ATTRIBUTIONS",
|
| 154 |
+
variant="primary",
|
| 155 |
+
size="lg"
|
| 156 |
+
)
|
| 157 |
+
generate_btn.click(
|
| 158 |
+
fn=generate_output,
|
| 159 |
+
inputs=[baselines, option],
|
| 160 |
+
outputs=[]
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Results section
|
| 164 |
+
gr.Markdown("### ๐ Results Visualization")
|
| 165 |
+
|
| 166 |
+
slider = gr.Slider(
|
| 167 |
+
label="๐ฌ Key Frame Selector",
|
| 168 |
+
minimum=0,
|
| 169 |
+
maximum=1000,
|
| 170 |
+
step=1,
|
| 171 |
+
value=0,
|
| 172 |
+
info="Navigate through different timesteps to see attributions"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
results_interface = gr.Interface(
|
| 176 |
+
fn=get_frame_and_attribution,
|
| 177 |
+
inputs=slider,
|
| 178 |
+
live=True,
|
| 179 |
+
outputs=[
|
| 180 |
+
gr.Image(label="๐ฎ Environment State", type="numpy"),
|
| 181 |
+
gr.Label(label="๐ Feature Attributions", num_top_classes=8)
|
| 182 |
+
],
|
| 183 |
+
title="Real-time Attribution Analysis"
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
gr.Markdown("""
|
| 187 |
+
---
|
| 188 |
+
|
| 189 |
+
## ๐ ๏ธ Local Usage & Installation
|
| 190 |
+
|
| 191 |
+
### Required Packages
|
| 192 |
+
```bash
|
| 193 |
+
pip install torch gymnasium 'gymnasium[box2d]'
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
### Box2D Installation (macOS)
|
| 197 |
+
```bash
|
| 198 |
+
brew install swig
|
| 199 |
+
pip install box2d
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
## ๐ฏ Lunar Lander Environment Details
|
| 203 |
+
|
| 204 |
+
### Reward Structure
|
| 205 |
+
- **Position**: Increased/decreased based on distance to landing pad
|
| 206 |
+
- **Velocity**: Increased/decreased based on speed (slower is better)
|
| 207 |
+
- **Angle**: Decreased when lander is tilted (horizontal is ideal)
|
| 208 |
+
- **Landing**: +10 points for each leg touching ground
|
| 209 |
+
- **Fuel**: -0.03 points per frame for side engine, -0.3 for main engine
|
| 210 |
+
- **Episode End**: -100 for crash, +100 for safe landing
|
| 211 |
+
|
| 212 |
+
**Success Threshold**: 200+ points per episode
|
| 213 |
+
|
| 214 |
+
### Training Functions
|
| 215 |
+
- `load_trained()`: Loads pre-trained model (1000 episodes)
|
| 216 |
+
- `train()`: Trains from scratch
|
| 217 |
+
- Set `render_mode=False` for faster training
|
| 218 |
+
|
| 219 |
+
---
|
| 220 |
+
|
| 221 |
+
*Built with โค๏ธ using Gradio, PyTorch, and Captum*
|
| 222 |
+
""")
|
| 223 |
|
| 224 |
+
if __name__ == "__main__":
|
| 225 |
+
demo.launch()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "deep-rl-explainability"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
requires-python = ">=3.10"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"ale-py==0.8.1",
|
| 8 |
+
"autorom==0.4.2",
|
| 9 |
+
"autorom-accept-rom-license==0.6.1",
|
| 10 |
+
"captum==0.6.0",
|
| 11 |
+
"gradio>=5.44.1",
|
| 12 |
+
"gymnasium[box2d]==0.29.1",
|
| 13 |
+
"huggingface-hub>=0.34.4",
|
| 14 |
+
"imageio==2.31.5",
|
| 15 |
+
"imageio-ffmpeg==0.4.9",
|
| 16 |
+
"matplotlib==3.8.0",
|
| 17 |
+
"matplotlib-inline==0.1.6",
|
| 18 |
+
"moviepy==1.0.3",
|
| 19 |
+
"mujoco==2.3.7",
|
| 20 |
+
"numpy==1.26.0",
|
| 21 |
+
"scipy>=1.15.3",
|
| 22 |
+
"torch==2.1.0",
|
| 23 |
+
]
|
requirements_hf.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch==2.1.0
|
| 2 |
+
gymnasium==0.29.1
|
| 3 |
+
gymnasium[box2d]
|
| 4 |
+
gradio==5.44.1
|
| 5 |
+
captum==0.6.0
|
| 6 |
+
numpy==1.26.0
|
| 7 |
+
scipy
|
| 8 |
+
matplotlib==3.8.0
|
| 9 |
+
moviepy==1.0.3
|
| 10 |
+
imageio==2.31.5
|
| 11 |
+
imageio-ffmpeg==0.4.9
|
| 12 |
+
box2d-py==2.3.5
|
| 13 |
+
swig==4.*
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|