Abdullah
commited on
Uploading saes for ['transformer.h.0.attn', 'transformer.h.0.mlp', 'transformer.h.1.attn', 'transformer.h.1.mlp'] and sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32
Browse files- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/config.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/model_config.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/state.pt +3 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.mlp/sae.safetensors +3 -0
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81ec4a68d15406a4705b860cc8ccb0cda3b0bf3d809162715b62225325b749eb
|
3 |
+
size 1483
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/model_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2", "dataset_name": "withmartian/cs5_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True"}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc32ddd3dd7d6b9c0c7460ff20f68fb34ef6492b11e35451dbd11ded4d5a8797
|
3 |
+
size 1007171903
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b6f5d39b1676e4ec5ee3ef0a20eb93e1d627aafd8180afb79e78588c8df3098
|
3 |
+
size 493769
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.45787763595581055, "fvu": 0.027461709454655647}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:900f9043af69c331c7623b4808e154fae2eecff89669b2a1c71022150bc70e4b
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.09622396528720856, "fvu": 0.04256979376077652}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.0.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c3559130941713cdc0846e35a78ee0ee9ea8282ed6ff2cdd4fc9d9e89fdd0ac
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.47031253576278687, "fvu": 0.04730036109685898}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e5a1803464b5de9526b4b03e32ddd2bbf138624d5c735a3bf4df0ac31b87842
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.14121094346046448, "fvu": 0.06863211840391159}
|
saes_sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2_syn=True/k=32/transformer.h.1.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ca440fc9379a5cf84c84d02610645d846a8f81ccf65f6993a053c683924f4cb
|
3 |
+
size 125894984
|