Upload folder using huggingface_hub
Browse files- layer_2/k_128/trainer_0/ae.pt +3 -0
- layer_2/k_128/trainer_0/config.json +31 -0
- layer_2/k_256/trainer_0/ae.pt +3 -0
- layer_2/k_256/trainer_0/config.json +31 -0
- layer_2/k_32/trainer_0/ae.pt +3 -0
- layer_2/k_32/trainer_0/config.json +31 -0
- layer_2/k_512/trainer_0/ae.pt +3 -0
- layer_2/k_512/trainer_0/config.json +31 -0
- layer_2/k_64/trainer_0/ae.pt +3 -0
- layer_2/k_64/trainer_0/config.json +31 -0
layer_2/k_128/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:606bb3fd1f5583f4dda6a2b605a6e463837e7f29d2349aec1b23023f5fc38da8
|
| 3 |
+
size 118091993
|
layer_2/k_128/trainer_0/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 1e-05,
|
| 6 |
+
"steps": 10000,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 500,
|
| 9 |
+
"decay_start": null,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": null,
|
| 13 |
+
"activation_dim": 480,
|
| 14 |
+
"dict_size": 30720,
|
| 15 |
+
"k": 128,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 2,
|
| 18 |
+
"lm_name": "yangheng/PlantRNA-FM",
|
| 19 |
+
"wandb_name": "plantrnafm-layer-2-k-128_trainer_0",
|
| 20 |
+
"submodule_name": null
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 480,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 100,
|
| 26 |
+
"ctx_len": 128,
|
| 27 |
+
"refresh_batch_size": 64,
|
| 28 |
+
"out_batch_size": 1024,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
layer_2/k_256/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32237c44bf01cde70658bc7a89ecb0a6335ba3192ba67fd87874baa4cc3eb955
|
| 3 |
+
size 118091993
|
layer_2/k_256/trainer_0/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 1e-05,
|
| 6 |
+
"steps": 10000,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 500,
|
| 9 |
+
"decay_start": null,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": null,
|
| 13 |
+
"activation_dim": 480,
|
| 14 |
+
"dict_size": 30720,
|
| 15 |
+
"k": 256,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 2,
|
| 18 |
+
"lm_name": "yangheng/PlantRNA-FM",
|
| 19 |
+
"wandb_name": "plantrnafm-layer-2-k-256_trainer_0",
|
| 20 |
+
"submodule_name": null
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 480,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 100,
|
| 26 |
+
"ctx_len": 128,
|
| 27 |
+
"refresh_batch_size": 64,
|
| 28 |
+
"out_batch_size": 1024,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
layer_2/k_32/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0dfb1b820cf8fad633445faa388ba84ebd1f025ae6e42f72b30d73885a186df
|
| 3 |
+
size 118091993
|
layer_2/k_32/trainer_0/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 1e-05,
|
| 6 |
+
"steps": 10000,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 500,
|
| 9 |
+
"decay_start": null,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": null,
|
| 13 |
+
"activation_dim": 480,
|
| 14 |
+
"dict_size": 30720,
|
| 15 |
+
"k": 32,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 2,
|
| 18 |
+
"lm_name": "yangheng/PlantRNA-FM",
|
| 19 |
+
"wandb_name": "plantrnafm-layer-2-k-32_trainer_0",
|
| 20 |
+
"submodule_name": null
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 480,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 100,
|
| 26 |
+
"ctx_len": 128,
|
| 27 |
+
"refresh_batch_size": 64,
|
| 28 |
+
"out_batch_size": 1024,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
layer_2/k_512/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69ae55d82cc9f07a0ddb209eb598444dada29e2091b7840a8dc97ddedecbef8e
|
| 3 |
+
size 118091993
|
layer_2/k_512/trainer_0/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 1e-05,
|
| 6 |
+
"steps": 10000,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 500,
|
| 9 |
+
"decay_start": null,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": null,
|
| 13 |
+
"activation_dim": 480,
|
| 14 |
+
"dict_size": 30720,
|
| 15 |
+
"k": 512,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 2,
|
| 18 |
+
"lm_name": "yangheng/PlantRNA-FM",
|
| 19 |
+
"wandb_name": "plantrnafm-layer-2-k-512_trainer_0",
|
| 20 |
+
"submodule_name": null
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 480,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 100,
|
| 26 |
+
"ctx_len": 128,
|
| 27 |
+
"refresh_batch_size": 64,
|
| 28 |
+
"out_batch_size": 1024,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|
layer_2/k_64/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e23245d31fe1f7d11fcfb998a0d974c5527053184b42846522c1e6151fb07976
|
| 3 |
+
size 118091993
|
layer_2/k_64/trainer_0/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "TopKTrainer",
|
| 4 |
+
"dict_class": "AutoEncoderTopK",
|
| 5 |
+
"lr": 1e-05,
|
| 6 |
+
"steps": 10000,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 500,
|
| 9 |
+
"decay_start": null,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"seed": null,
|
| 13 |
+
"activation_dim": 480,
|
| 14 |
+
"dict_size": 30720,
|
| 15 |
+
"k": 64,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"layer": 2,
|
| 18 |
+
"lm_name": "yangheng/PlantRNA-FM",
|
| 19 |
+
"wandb_name": "plantrnafm-layer-2-k-64_trainer_0",
|
| 20 |
+
"submodule_name": null
|
| 21 |
+
},
|
| 22 |
+
"buffer": {
|
| 23 |
+
"d_submodule": 480,
|
| 24 |
+
"io": "out",
|
| 25 |
+
"n_ctxs": 100,
|
| 26 |
+
"ctx_len": 128,
|
| 27 |
+
"refresh_batch_size": 64,
|
| 28 |
+
"out_batch_size": 1024,
|
| 29 |
+
"device": "cuda"
|
| 30 |
+
}
|
| 31 |
+
}
|