Add files using upload-large-folder tool
Browse files- config.json +1 -0
- layers.0.mlp/cfg.json +1 -0
- layers.0.mlp/sae.safetensors +3 -0
- layers.1.mlp/cfg.json +1 -0
- layers.1.mlp/sae.safetensors +3 -0
- layers.10.mlp/cfg.json +1 -0
- layers.10.mlp/sae.safetensors +3 -0
- layers.11.mlp/cfg.json +1 -0
- layers.11.mlp/sae.safetensors +3 -0
- layers.12.mlp/cfg.json +1 -0
- layers.12.mlp/sae.safetensors +3 -0
- layers.13.mlp/cfg.json +1 -0
- layers.13.mlp/sae.safetensors +3 -0
- layers.14.mlp/cfg.json +1 -0
- layers.14.mlp/sae.safetensors +3 -0
- layers.15.mlp/cfg.json +1 -0
- layers.15.mlp/sae.safetensors +3 -0
- layers.2.mlp/cfg.json +1 -0
- layers.2.mlp/sae.safetensors +3 -0
- layers.3.mlp/cfg.json +1 -0
- layers.3.mlp/sae.safetensors +3 -0
- layers.4.mlp/cfg.json +1 -0
- layers.4.mlp/sae.safetensors +3 -0
- layers.5.mlp/cfg.json +1 -0
- layers.5.mlp/sae.safetensors +3 -0
- layers.6.mlp/cfg.json +1 -0
- layers.6.mlp/sae.safetensors +3 -0
- layers.7.mlp/cfg.json +1 -0
- layers.7.mlp/sae.safetensors +3 -0
- layers.8.mlp/cfg.json +1 -0
- layers.8.mlp/sae.safetensors +3 -0
- layers.9.mlp/cfg.json +1 -0
- layers.9.mlp/sae.safetensors +3 -0
- lr_scheduler_0.pt +3 -0
- optimizer_0.pt +3 -0
- rank_0_state.pt +3 -0
- rank_1_state.pt +3 -0
- rank_2_state.pt +3 -0
- rank_3_state.pt +3 -0
- state.pt +3 -0
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "loss_fn": "fvu", "kl_coeff": 1.0, "filter_bos": true, "remove_transcoded_modules": false, "optimizer": "adam", "lr": 0.0002, "lr_warmup_steps": 50, "b1": 0.9, "b2": 0.999, "force_lr_warmup": false, "k_decay_steps": 0, "k_anneal_mul": 10, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "feature_link_l1": 0.0, "feature_link_batch": 4096, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp"], "hookpoints_in": [], "init_seeds": [0], "layers": [], "per_layer_k": [], "layer_stride": 1, "cross_layer": 0, "grad_scaler": false, "tp": 4, "save_every": 1000, "save_best": false, "finetune": null, "restart_epoch": false, "log_to_wandb": true, "run_name": "llama-sweep/bs32_lr2e-4_none_ef64_k32", "wandb_log_frequency": 1, "save_dir": "checkpoints", "model": "meta-llama/Llama-3.2-1B", "dataset": "EleutherAI/SmolLM2-135M-10B", "split": "train", "ctx_len": 128, "return_overflowed_tokens": true, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
layers.0.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.0.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40aeb375d38629e08c99b99d2448b9d5098026639185ac2f753b75c60645e27d
|
3 |
+
size 2165842688
|
layers.1.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.1.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:deec7b1da06fde45ac5bd40e71144dd900d77a2d3ea3711cfdb5011085dffaed
|
3 |
+
size 2165842688
|
layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.10.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2876138e551dbee8459b9c2e04fefcb97bceff309b1daf33431ce014fafaf3a5
|
3 |
+
size 2165842688
|
layers.11.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.11.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:463268e684891fd6003adcf6dcab4bd37561dd802616888045cb237253280d83
|
3 |
+
size 2165842688
|
layers.12.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.12.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d208c2d52db1e670cfa65ad4e27757429268818880468969428da9bdf07c571d
|
3 |
+
size 2165842688
|
layers.13.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.13.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5205fe3d408bdac1f281109bf6229d7088196b36d22fe2364d38a5e418fdcfb0
|
3 |
+
size 2165842688
|
layers.14.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.14.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1cf4ddf0c3987867ef1404566bc6b94092e23f87430644ffd25c5848ca543a0
|
3 |
+
size 2165842688
|
layers.15.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.15.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2971d2721c6f78c7c0a9187ccae84522cb5d690006bbec6407abc11148a40cef
|
3 |
+
size 2165842688
|
layers.2.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.2.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40495622a48ea41d9d54498cf152e6e035a0f6c1b46e6e0a2e2f05924d045a4d
|
3 |
+
size 2165842688
|
layers.3.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.3.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1c95d0da9328a4a3381c825fb32954e1d26ef7b02589b9c5516de8bb86e2161
|
3 |
+
size 2165842688
|
layers.4.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.4.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4374045e590d75cbfeb2e5cb27d6405a5afea024f805bbd9e526db761e72dbc9
|
3 |
+
size 2165842688
|
layers.5.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.5.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b75ff4a1f01e6d654ab97bf466f33ca7af3c6baebc520e7f588ac756fed578d7
|
3 |
+
size 2165842688
|
layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.6.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:446923f5b017c8bd5f2cfb6016fa504f89b0e82fdf5c6d5e565ebc60a582e071
|
3 |
+
size 2165842688
|
layers.7.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.7.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d46b5e382ea95c66da95d000c834241c719125ca9d55bcee4657c14afd4862c4
|
3 |
+
size 2165842688
|
layers.8.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.8.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f04b66b0adc54e4951a01ced619ec237cfe321782a4d8e997ebaec1d02e3283e
|
3 |
+
size 2165842688
|
layers.9.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}
|
layers.9.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4da543049cbffae08902e9c5fe457ed68225d0d24cb6015e56d1f479a3e17d6
|
3 |
+
size 2165842688
|
lr_scheduler_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f0c644e6d5a0cfd72a193c1e92509b5998cd971f8a28660c154f96f768c3475
|
3 |
+
size 9229
|
optimizer_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c37a84d4ac9255efe9c5c4a5cc442a211bd288666fd7dd780022758aadd28268
|
3 |
+
size 69307063487
|
rank_0_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e48205e1a0641ff0ee79ff94a138dc8432c20643a3b1d351a0357aeaf27ebd
|
3 |
+
size 16783265
|
rank_1_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cf3c1fd767951f9f049e99cba92f1cc72fcb6b67da81c3f2559d8e93c9f850e
|
3 |
+
size 16783265
|
rank_2_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e2655a50000142ec519b05ec502db71d4d623463855dc0e2782de778d179ea9
|
3 |
+
size 16783265
|
rank_3_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57f22b2acd5f9070748a4bb2055bef33037148ec7dc04f0c3b3d61c3392b464b
|
3 |
+
size 16783265
|
state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2259d3ee6fffebb7b654f9ff5bad598ef35dd9cc0eed4134872bdc9afbfebe2e
|
3 |
+
size 1249
|