File size: 1,895 Bytes
6f58daf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{
    "cache_cfg": {
        "dataset_repo": "EleutherAI/fineweb-edu-dedup-10b",
        "dataset_split": "train",
        "dataset_name": "",
        "dataset_column": "text",
        "batch_size": 16,
        "cache_ctx_len": 256,
        "n_tokens": 10000000,
        "n_splits": 5
    },
    "constructor_cfg": {
        "faiss_embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
        "faiss_embedding_cache_dir": ".embedding_cache",
        "faiss_embedding_cache_enabled": true,
        "example_ctx_len": 32,
        "min_examples": 200,
        "n_non_activating": 50,
        "center_examples": true,
        "non_activating_source": "random",
        "neighbours_type": "co-occurrence"
    },
    "sampler_cfg": {
        "n_examples_train": 40,
        "n_examples_test": 50,
        "n_quantiles": 10,
        "train_type": "quantiles",
        "test_type": "quantiles",
        "ratio_top": 0.2
    },
    "model": "meta-llama/Llama-3.2-1B",
    "sparse_model": "results/llama-mntss-relu",
    "hookpoints": [
        "layers.0.mlp",
        "layers.1.mlp",
        "layers.2.mlp",
        "layers.3.mlp",
        "layers.4.mlp",
        "layers.5.mlp",
        "layers.6.mlp",
        "layers.7.mlp",
        "layers.8.mlp",
        "layers.9.mlp",
        "layers.10.mlp",
        "layers.11.mlp",
        "layers.12.mlp",
        "layers.13.mlp",
        "layers.14.mlp",
        "layers.15.mlp"
    ],
    "explainer_model": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
    "explainer_model_max_len": 5120,
    "explainer_provider": "offline",
    "name": "transcoder_llama_131k_mntss",
    "max_latents": null,
    "filter_bos": false,
    "log_probs": false,
    "load_in_8bit": false,
    "hf_token": null,
    "pipeline_num_proc": 48,
    "num_gpus": 1,
    "seed": 22,
    "verbose": true,
    "num_examples_per_scorer_prompt": 5,
    "overwrite": []
}