Upload SAEs from experiment 4.3
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/config.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/model_config.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/state.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/config.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/model_config.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/state.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/config.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/lr_scheduler.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/model_config.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/optimizer.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/state.pt +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.mlp/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.mlp/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.attn/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.attn/metrics.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.attn/sae.safetensors +3 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.mlp/cfg.json +1 -0
- saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.mlp/metrics.json +1 -0
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afafb111606b64199b463308d054f28acf71d5cca5174e9fd9fce0b0ab0a717c
|
3 |
+
size 1076
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/model_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs2_experiment_2.10", "dataset_name": "withmartian/cs2_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs2_experiment_2.10_syn=True"}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7da704ac6e2129487da820bdc9a9115f7090d98c6d111df2c5072620f9558e7
|
3 |
+
size 255767204
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba2943dec258b4b72adf0815e0552de5fe634fb0d527143961d6ad61e507c078
|
3 |
+
size 493312
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.2057291716337204, "fvu": 0.0022642838302999735}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70cba99086b79c82c1469918102f54834ea64a885e127b8c4f0e404dd31f31f8
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.0001302083401242271, "fvu": 0.01662026345729828}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.0.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a060d18c76f80717cbcadd1739bf2ebe3822223a37629b150192c21bd5ba0195
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"fvu": 0.01743653230369091, "dead_pct": 0.23743490874767303}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b7c8c65b22ab6d392199b73e154ffd27b87758182da30543e4ed03a75bd09e4
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.0013020833721384406, "fvu": 0.027486111968755722}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=128/transformer.h.1.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f1bc41a20f300a09c3a6145546c775e564b409a1f115be8fdda8a125bf29543
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a19ec7a7d99a9517726d577d84860d99635fa1571577af8b6d7ba1c019ae8379
|
3 |
+
size 1076
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/model_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs2_experiment_2.10", "dataset_name": "withmartian/cs2_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs2_experiment_2.10_syn=True"}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10de60f0daa1814700b5cc0c40e8fa2b906b2a0f4e9e0468a809fda0d05f1bf4
|
3 |
+
size 68243108
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d613321b12a736d53fc68af5317721a82e546fe2089aca98469f639ecab033f8
|
3 |
+
size 132864
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.660400390625, "fvu": 0.016979176551103592}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43accc6b9f1c89519cc7024a64f0fc6ba6b4755c444cf698891dc289663cd2b2
|
3 |
+
size 33575240
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.2900390625, "fvu": 0.03907718509435654}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.0.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2c225481766b3491172ec156fc071c7b8993bb830073d79c57a445a8f41b22e
|
3 |
+
size 33575240
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.778564453125, "fvu": 0.05981138348579407}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60fc257c06d4c8b6ce12e140d995c06032651e56630bfc79f430ca3b37a012dd
|
3 |
+
size 33575240
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 4, "normalize_decoder": true, "num_latents": 0, "k": 16, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.419921875, "fvu": 0.07065705209970474}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=16/transformer.h.1.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d850eed5a84a0a25eec6c3f8e212319b5578021385b0c3ab9f815357bcb18ae
|
3 |
+
size 33575240
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 16, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["transformer.h.0.attn", "transformer.h.0.mlp", "transformer.h.1.attn", "transformer.h.1.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 100000, "log_to_wandb": true, "run_name": "saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256", "wandb_log_frequency": 1}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afafb111606b64199b463308d054f28acf71d5cca5174e9fd9fce0b0ab0a717c
|
3 |
+
size 1076
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/model_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "withmartian/sql_interp_bm1_cs2_experiment_2.10", "dataset_name": "withmartian/cs2_dataset_synonyms", "model_abbrev": "saes_sql_interp_bm1_cs2_experiment_2.10_syn=True"}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd2cc81079cd17a50237e1762d19abe77309917d73e12122da7d44d550f2bbb9
|
3 |
+
size 255767204
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4925e63d32f98c7d7fbcd1f289d91a58bb78c6f7e90016fc0cd87eab9145e36a
|
3 |
+
size 493312
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dead_pct": 0.20664063096046448, "fvu": 0.0016410077223554254}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bcaa2f42fe77d37044b250f5569c5d855438badb960d0ed06b912d08b37e8c1
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"fvu": 0.012025310657918453, "dead_pct": 6.510417006211355e-05}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.0.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcb44088b0363248a709436443525c29acfb309eac1b98970936a6085436c032
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.attn/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.attn/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"fvu": 0.009096871130168438, "dead_pct": 0.3643229305744171}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.attn/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9af7bb21b582bed761f367c258982d59d2f408b42736ed46c4b29c42456e805a
|
3 |
+
size 125894984
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 15, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 1024}
|
saes_sql_interp_bm1_cs2_experiment_2.10_syn=True/k=256/transformer.h.1.mlp/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"fvu": 0.019307561218738556, "dead_pct": 6.510417006211355e-05}
|