afg1 commited on
Commit
88816b1
·
verified ·
1 Parent(s): f85c0ab

Upload folder using huggingface_hub

Browse files
layer_2/k_128/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606bb3fd1f5583f4dda6a2b605a6e463837e7f29d2349aec1b23023f5fc38da8
3
+ size 118091993
layer_2/k_128/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 1e-05,
6
+ "steps": 10000,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 500,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": null,
13
+ "activation_dim": 480,
14
+ "dict_size": 30720,
15
+ "k": 128,
16
+ "device": "cuda",
17
+ "layer": 2,
18
+ "lm_name": "yangheng/PlantRNA-FM",
19
+ "wandb_name": "plantrnafm-layer-2-k-128_trainer_0",
20
+ "submodule_name": null
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 480,
24
+ "io": "out",
25
+ "n_ctxs": 100,
26
+ "ctx_len": 128,
27
+ "refresh_batch_size": 64,
28
+ "out_batch_size": 1024,
29
+ "device": "cuda"
30
+ }
31
+ }
layer_2/k_256/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32237c44bf01cde70658bc7a89ecb0a6335ba3192ba67fd87874baa4cc3eb955
3
+ size 118091993
layer_2/k_256/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 1e-05,
6
+ "steps": 10000,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 500,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": null,
13
+ "activation_dim": 480,
14
+ "dict_size": 30720,
15
+ "k": 256,
16
+ "device": "cuda",
17
+ "layer": 2,
18
+ "lm_name": "yangheng/PlantRNA-FM",
19
+ "wandb_name": "plantrnafm-layer-2-k-256_trainer_0",
20
+ "submodule_name": null
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 480,
24
+ "io": "out",
25
+ "n_ctxs": 100,
26
+ "ctx_len": 128,
27
+ "refresh_batch_size": 64,
28
+ "out_batch_size": 1024,
29
+ "device": "cuda"
30
+ }
31
+ }
layer_2/k_32/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0dfb1b820cf8fad633445faa388ba84ebd1f025ae6e42f72b30d73885a186df
3
+ size 118091993
layer_2/k_32/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 1e-05,
6
+ "steps": 10000,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 500,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": null,
13
+ "activation_dim": 480,
14
+ "dict_size": 30720,
15
+ "k": 32,
16
+ "device": "cuda",
17
+ "layer": 2,
18
+ "lm_name": "yangheng/PlantRNA-FM",
19
+ "wandb_name": "plantrnafm-layer-2-k-32_trainer_0",
20
+ "submodule_name": null
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 480,
24
+ "io": "out",
25
+ "n_ctxs": 100,
26
+ "ctx_len": 128,
27
+ "refresh_batch_size": 64,
28
+ "out_batch_size": 1024,
29
+ "device": "cuda"
30
+ }
31
+ }
layer_2/k_512/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ae55d82cc9f07a0ddb209eb598444dada29e2091b7840a8dc97ddedecbef8e
3
+ size 118091993
layer_2/k_512/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 1e-05,
6
+ "steps": 10000,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 500,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": null,
13
+ "activation_dim": 480,
14
+ "dict_size": 30720,
15
+ "k": 512,
16
+ "device": "cuda",
17
+ "layer": 2,
18
+ "lm_name": "yangheng/PlantRNA-FM",
19
+ "wandb_name": "plantrnafm-layer-2-k-512_trainer_0",
20
+ "submodule_name": null
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 480,
24
+ "io": "out",
25
+ "n_ctxs": 100,
26
+ "ctx_len": 128,
27
+ "refresh_batch_size": 64,
28
+ "out_batch_size": 1024,
29
+ "device": "cuda"
30
+ }
31
+ }
layer_2/k_64/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e23245d31fe1f7d11fcfb998a0d974c5527053184b42846522c1e6151fb07976
3
+ size 118091993
layer_2/k_64/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 1e-05,
6
+ "steps": 10000,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 500,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": null,
13
+ "activation_dim": 480,
14
+ "dict_size": 30720,
15
+ "k": 64,
16
+ "device": "cuda",
17
+ "layer": 2,
18
+ "lm_name": "yangheng/PlantRNA-FM",
19
+ "wandb_name": "plantrnafm-layer-2-k-64_trainer_0",
20
+ "submodule_name": null
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 480,
24
+ "io": "out",
25
+ "n_ctxs": 100,
26
+ "ctx_len": 128,
27
+ "refresh_batch_size": 64,
28
+ "out_batch_size": 1024,
29
+ "device": "cuda"
30
+ }
31
+ }