| { | |
| "cache_cfg": { | |
| "dataset_repo": "EleutherAI/fineweb-edu-dedup-10b", | |
| "dataset_split": "train", | |
| "dataset_name": "", | |
| "dataset_column": "text", | |
| "batch_size": 16, | |
| "cache_ctx_len": 256, | |
| "n_tokens": 10000000, | |
| "n_splits": 5 | |
| }, | |
| "constructor_cfg": { | |
| "faiss_embedding_model": "sentence-transformers/all-MiniLM-L6-v2", | |
| "faiss_embedding_cache_dir": ".embedding_cache", | |
| "faiss_embedding_cache_enabled": true, | |
| "example_ctx_len": 32, | |
| "min_examples": 200, | |
| "n_non_activating": 50, | |
| "center_examples": true, | |
| "non_activating_source": "random", | |
| "neighbours_type": "co-occurrence" | |
| }, | |
| "sampler_cfg": { | |
| "n_examples_train": 40, | |
| "n_examples_test": 50, | |
| "n_quantiles": 10, | |
| "train_type": "quantiles", | |
| "test_type": "quantiles", | |
| "ratio_top": 0.2 | |
| }, | |
| "model": "meta-llama/Llama-3.2-1B", | |
| "sparse_model": "results/llama-mntss-relu", | |
| "hookpoints": [ | |
| "layers.0.mlp", | |
| "layers.1.mlp", | |
| "layers.2.mlp", | |
| "layers.3.mlp", | |
| "layers.4.mlp", | |
| "layers.5.mlp", | |
| "layers.6.mlp", | |
| "layers.7.mlp", | |
| "layers.8.mlp", | |
| "layers.9.mlp", | |
| "layers.10.mlp", | |
| "layers.11.mlp", | |
| "layers.12.mlp", | |
| "layers.13.mlp", | |
| "layers.14.mlp", | |
| "layers.15.mlp" | |
| ], | |
| "explainer_model": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4", | |
| "explainer_model_max_len": 5120, | |
| "explainer_provider": "offline", | |
| "name": "transcoder_llama_131k_mntss", | |
| "max_latents": null, | |
| "filter_bos": false, | |
| "log_probs": false, | |
| "load_in_8bit": false, | |
| "hf_token": null, | |
| "pipeline_num_proc": 48, | |
| "num_gpus": 1, | |
| "seed": 22, | |
| "verbose": true, | |
| "num_examples_per_scorer_prompt": 5, | |
| "overwrite": [] | |
| } | 
