nev commited on Jul 8

Commit

41793f9

verified ·

1 Parent(s): 5b91908

Add files using upload-large-folder tool

Browse files

Files changed (40) hide show

config.json +1 -0
layers.0.mlp/cfg.json +1 -0
layers.0.mlp/sae.safetensors +3 -0
layers.1.mlp/cfg.json +1 -0
layers.1.mlp/sae.safetensors +3 -0
layers.10.mlp/cfg.json +1 -0
layers.10.mlp/sae.safetensors +3 -0
layers.11.mlp/cfg.json +1 -0
layers.11.mlp/sae.safetensors +3 -0
layers.12.mlp/cfg.json +1 -0
layers.12.mlp/sae.safetensors +3 -0
layers.13.mlp/cfg.json +1 -0
layers.13.mlp/sae.safetensors +3 -0
layers.14.mlp/cfg.json +1 -0
layers.14.mlp/sae.safetensors +3 -0
layers.15.mlp/cfg.json +1 -0
layers.15.mlp/sae.safetensors +3 -0
layers.2.mlp/cfg.json +1 -0
layers.2.mlp/sae.safetensors +3 -0
layers.3.mlp/cfg.json +1 -0
layers.3.mlp/sae.safetensors +3 -0
layers.4.mlp/cfg.json +1 -0
layers.4.mlp/sae.safetensors +3 -0
layers.5.mlp/cfg.json +1 -0
layers.5.mlp/sae.safetensors +3 -0
layers.6.mlp/cfg.json +1 -0
layers.6.mlp/sae.safetensors +3 -0
layers.7.mlp/cfg.json +1 -0
layers.7.mlp/sae.safetensors +3 -0
layers.8.mlp/cfg.json +1 -0
layers.8.mlp/sae.safetensors +3 -0
layers.9.mlp/cfg.json +1 -0
layers.9.mlp/sae.safetensors +3 -0
lr_scheduler_0.pt +3 -0
optimizer_0.pt +3 -0
rank_0_state.pt +3 -0
rank_1_state.pt +3 -0
rank_2_state.pt +3 -0
rank_3_state.pt +3 -0
state.pt +3 -0

config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"sae": {"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "loss_fn": "fvu", "kl_coeff": 1.0, "filter_bos": true, "remove_transcoded_modules": false, "optimizer": "adam", "lr": 0.0002, "lr_warmup_steps": 50, "b1": 0.9, "b2": 0.999, "force_lr_warmup": false, "k_decay_steps": 0, "k_anneal_mul": 10, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "feature_link_l1": 0.0, "feature_link_batch": 4096, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp"], "hookpoints_in": [], "init_seeds": [0], "layers": [], "per_layer_k": [], "layer_stride": 1, "cross_layer": 0, "grad_scaler": false, "tp": 4, "save_every": 1000, "save_best": false, "finetune": null, "restart_epoch": false, "log_to_wandb": true, "run_name": "llama-sweep/bs32_lr2e-4_none_ef64_k32", "wandb_log_frequency": 1, "save_dir": "checkpoints", "model": "meta-llama/Llama-3.2-1B", "dataset": "EleutherAI/SmolLM2-135M-10B", "split": "train", "ctx_len": 128, "return_overflowed_tokens": true, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "shuffle_seed": 42, "data_preprocessing_num_proc": 48}

layers.0.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"dtype": "none", "activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": true, "transcode": true, "tp_output": true, "n_targets": 0, "n_sources": 0, "normalize_io": true, "divide_cross_layer": false, "train_post_encoder": true, "post_encoder_scale": true, "per_source_tied": false, "coalesce_topk": "none", "topk_coalesced": false, "use_fp8": false, "d_in": 2048}

layers.0.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40aeb375d38629e08c99b99d2448b9d5098026639185ac2f753b75c60645e27d
+size 2165842688

layers.1.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.1.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deec7b1da06fde45ac5bd40e71144dd900d77a2d3ea3711cfdb5011085dffaed
+size 2165842688

layers.10.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.10.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2876138e551dbee8459b9c2e04fefcb97bceff309b1daf33431ce014fafaf3a5
+size 2165842688

layers.11.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.11.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:463268e684891fd6003adcf6dcab4bd37561dd802616888045cb237253280d83
+size 2165842688

layers.12.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.12.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d208c2d52db1e670cfa65ad4e27757429268818880468969428da9bdf07c571d
+size 2165842688

layers.13.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.13.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5205fe3d408bdac1f281109bf6229d7088196b36d22fe2364d38a5e418fdcfb0
+size 2165842688

layers.14.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.14.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1cf4ddf0c3987867ef1404566bc6b94092e23f87430644ffd25c5848ca543a0
+size 2165842688

layers.15.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.15.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2971d2721c6f78c7c0a9187ccae84522cb5d690006bbec6407abc11148a40cef
+size 2165842688

layers.2.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.2.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40495622a48ea41d9d54498cf152e6e035a0f6c1b46e6e0a2e2f05924d045a4d
+size 2165842688

layers.3.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.3.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1c95d0da9328a4a3381c825fb32954e1d26ef7b02589b9c5516de8bb86e2161
+size 2165842688

layers.4.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.4.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4374045e590d75cbfeb2e5cb27d6405a5afea024f805bbd9e526db761e72dbc9
+size 2165842688

layers.5.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.5.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b75ff4a1f01e6d654ab97bf466f33ca7af3c6baebc520e7f588ac756fed578d7
+size 2165842688

layers.6.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.6.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:446923f5b017c8bd5f2cfb6016fa504f89b0e82fdf5c6d5e565ebc60a582e071
+size 2165842688

layers.7.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.7.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d46b5e382ea95c66da95d000c834241c719125ca9d55bcee4657c14afd4862c4
+size 2165842688

layers.8.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.8.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f04b66b0adc54e4951a01ced619ec237cfe321782a4d8e997ebaec1d02e3283e
+size 2165842688

layers.9.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

layers.9.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4da543049cbffae08902e9c5fe457ed68225d0d24cb6015e56d1f479a3e17d6
+size 2165842688

lr_scheduler_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f0c644e6d5a0cfd72a193c1e92509b5998cd971f8a28660c154f96f768c3475
+size 9229

optimizer_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c37a84d4ac9255efe9c5c4a5cc442a211bd288666fd7dd780022758aadd28268
+size 69307063487

rank_0_state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6e48205e1a0641ff0ee79ff94a138dc8432c20643a3b1d351a0357aeaf27ebd
+size 16783265

rank_1_state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf3c1fd767951f9f049e99cba92f1cc72fcb6b67da81c3f2559d8e93c9f850e
+size 16783265

rank_2_state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e2655a50000142ec519b05ec502db71d4d623463855dc0e2782de778d179ea9
+size 16783265

rank_3_state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57f22b2acd5f9070748a4bb2055bef33037148ec7dc04f0c3b3d61c3392b464b
+size 16783265

state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2259d3ee6fffebb7b654f9ff5bad598ef35dd9cc0eed4134872bdc9afbfebe2e
+size 1249