Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.json +63 -0
- global_step116760/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- global_step116760/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
- global_step116760/mp_rank_00_model_states.pt +3 -0
config.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/fs/archive/share/yulan/data/aa_mini/output/miniyulan-2B-final-stage12-from-107-5k/checkpoint-116500",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"MiniYuLanModelForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": true,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"dim_model_base": 1920,
|
| 10 |
+
"dim_model_base_attn": 64,
|
| 11 |
+
"dim_model_base_init": null,
|
| 12 |
+
"dim_model_base_lmh": 1,
|
| 13 |
+
"dim_model_base_logits": 1920.0,
|
| 14 |
+
"dim_model_base_lr": 256.0,
|
| 15 |
+
"down_proj_alpha": 0.03450327796711771,
|
| 16 |
+
"embed_tokens_alpha": 1,
|
| 17 |
+
"embedding_ln": false,
|
| 18 |
+
"embedding_rmsln": false,
|
| 19 |
+
"eos_token_id": 2,
|
| 20 |
+
"gate_up_proj_alpha": 0.3651483716701107,
|
| 21 |
+
"gradient_checkpointing_step": 11,
|
| 22 |
+
"hidden_act": "silu",
|
| 23 |
+
"hidden_size": 1920,
|
| 24 |
+
"hidden_states_shrink": 0.18708286933869706,
|
| 25 |
+
"init_scale_o": 1,
|
| 26 |
+
"initializer_range": 5e-05,
|
| 27 |
+
"input_layernorm_alpha": 1.0,
|
| 28 |
+
"intermediate_size": 4800,
|
| 29 |
+
"k_proj_alpha": 0.3651483716701107,
|
| 30 |
+
"layer_norm_eps": 1e-06,
|
| 31 |
+
"lm_head_alpha": 1.0,
|
| 32 |
+
"ln_scale": 1,
|
| 33 |
+
"max_position_embeddings": 4096,
|
| 34 |
+
"model_reproduce": "transformer",
|
| 35 |
+
"model_type": "miniyulan",
|
| 36 |
+
"norm_alpha": 1.0,
|
| 37 |
+
"num_attention_heads": 30,
|
| 38 |
+
"num_hidden_layers": 56,
|
| 39 |
+
"num_key_value_heads": 6,
|
| 40 |
+
"o_proj_alpha": 0.03450327796711771,
|
| 41 |
+
"post_attention_layernorm_alpha": 1.0,
|
| 42 |
+
"q_proj_alpha": 0.3651483716701107,
|
| 43 |
+
"qk_layernorm": false,
|
| 44 |
+
"rms_norm_eps": 1e-06,
|
| 45 |
+
"rms_type": "llama",
|
| 46 |
+
"rope_scaling": null,
|
| 47 |
+
"rope_theta": 10000.0,
|
| 48 |
+
"scale_emb": 10.0,
|
| 49 |
+
"shrink_alpha": 1,
|
| 50 |
+
"sliding_window": null,
|
| 51 |
+
"tie_word_embeddings": true,
|
| 52 |
+
"torch_dtype": "bfloat16",
|
| 53 |
+
"transformers_version": "4.44.0",
|
| 54 |
+
"use_cache": false,
|
| 55 |
+
"use_emb_alpha": true,
|
| 56 |
+
"use_liger": true,
|
| 57 |
+
"use_norm_alpha": true,
|
| 58 |
+
"use_sliding_window": false,
|
| 59 |
+
"v_proj_alpha": 0.3651483716701107,
|
| 60 |
+
"vocab_size": 99000,
|
| 61 |
+
"wesar_weights": true,
|
| 62 |
+
"z_loss": 0.0001
|
| 63 |
+
}
|
global_step116760/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaca1399dac290fc03e6829edef44a9362d93de78c90ac2499e0ea0d0f7cbb14
|
| 3 |
+
size 558554482
|
global_step116760/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e99ff3b049a055acc127a8dabfc63a3d433afda301f1431c0fa45545e5a9777
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dafc79889f21796f36f7fa9954d8ce3efe224154c577ee98d4a4e57ee4e8ae0
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a99f1ddb336f501e56dd4bb79359f4514aa79fcf870e18cfc2dca6556eb9ebe1
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31093496a127eefca8ca7142187e70a4418f4c0b75e929432a668ce1dafb1182
|
| 3 |
+
size 558554434
|
global_step116760/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:441960af4cb65d75db43bc22b73557dfa1b1dd6bc59dcfe13cd9b36be2d1686d
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4904c523948a9a9406513120d206b1670a5680800587ae5f498a2e6ff908ae8d
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec4ac7ea5bbe50a85d78d84c32296e0dc9ab17b747c8262d6e65d05c905076a5
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0032b046785ff4399e5358f24afe7368c3825a582ff553a52c9488d42a1307ec
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dec56ffd5d2dd7717eecefe0c2e5d2fc03d5e0aee14defdc9ba8104b25e7f94
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e5d30d698f3622594e957a0f7571f284e1fb28bdfb5334bfa54790878426ace
|
| 3 |
+
size 558554434
|
global_step116760/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:026f158acff0572212bb2d56b9d0aeb744a4661925d63aa6cbb2e6916b90f0a4
|
| 3 |
+
size 558554418
|
global_step116760/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce2d43892196bc584147ed57eced3e69be7284e21cdc7b2c1294c901f8a3e244
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53fbe42dc019b4ea7654ada98b488bf8602e641522e1826eac9126a709af7a75
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c02b998e62119317dfd1fd894960f84d585e0377d3c6258566ff3e57c604825
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f870cfeaf56f93b1b9fdc90c1ac6a0b79c3d1179c5f721415529236d6aa2c782
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:079d72665bec001eb0a9fa401227979bee16d35c60edf9fa0825ecb8d9f42497
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee6cf11d961260938507240b9826453eab2cfe51cdae993e56207ea572ccb1ca
|
| 3 |
+
size 558554434
|
global_step116760/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2848d3169937e792fd1a79e97a1f2da0cafeefdce058a05c00b8bb9ad20d99e
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a9728f24e665efd5195e0008b8e8ffb4369cd639b968114b6acb5e2c6b6743e
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:754d1b81ee1e16076e1dbcd5aeb5ec7e5530decfb3cb62f38aca2e7115a9970c
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d2e79457118c01cd2a3cbe9974f41adeb93ea5613afe96bdb940939c36c5aa5
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c93fc597ba32980cde47ee7999d4a8c27683d3bf55af59b871f0697ac120570d
|
| 3 |
+
size 558554290
|
global_step116760/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1091deb6996e43a8f900ed8ed22f7f6a9db0a933efaa3648dcca977fb0be8b54
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aa8aa706279ab74447f5f8780d13f7d12dbfb360332bad6008f615d29045da4
|
| 3 |
+
size 558554434
|
global_step116760/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:312c98b3670b4b5fbbfabf1bd1e76f8d7603efb0fae15dc16a040c82fd48ff5a
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:126d983f6566df317efaed0990ef87e7b4b2f844cca32fbac79e75f432271729
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef61976be7a483344626b34e56eab3a34b006e0379dd296158da3c59037ddf45
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01ec91eabe773d1dbd0ea7f0b79d5aefa4045148a84976b9362f896aa32c8456
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e0746c712c0a53b67b833d13f10ab0046d3d3cd1f972d1ee4f62e8ab42967e9
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5374a5d0070552f63ca247743d61ef9f3d8a7b68321d3c6f04071902761da419
|
| 3 |
+
size 558554434
|
global_step116760/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7c917b250a05b31356618be20bcf0fbdf67192d93c604901d66ecae11eb3d06
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51c8bb555a96b5666ba79bbbf71849b41c161606601674d8bf998e55300dbd68
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bf56c489510884aad35a0b2411e663935e58358c3da17d626d8bb0d6060c710
|
| 3 |
+
size 558554354
|
global_step116760/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:031fbf44156c92c73da8b9070a802c2a24b8f61e48cd6f9e7f57829b8d642324
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df8db4b025b93bfa2d17667671fea80f6f93732b68060089b922beff98afbbe3
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dfabb455a0c5262789fbc4698def413ecca40d8b88e866fa6b20e3a5bf09dbf
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d208e73ec0827f9f12519f246bdd159e72af8cfdad2e1750d8a12483dc7f1290
|
| 3 |
+
size 558554434
|
global_step116760/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd1df2bf0b2d7358d296f449df96c1b244ecdbbbd40b00b956e956441de0ce27
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21cd0dcfe0faa0904bd2e463790ee1950ab1096630d93251d8d109b223ebb52e
|
| 3 |
+
size 558554370
|
global_step116760/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671a778126426a97e7010360948eb1caf69962357c931fe21b99bfa21031bcd8
|
| 3 |
+
size 558554306
|
global_step116760/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88c45b8b662279eb8c9b2f7128682bc63a6b7606471614d7a5bb25385cd6f77c
|
| 3 |
+
size 558610626
|
global_step116760/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3667b0729856b5c89fd68fc64bb7e97c3367aee187d206a18f885618d9211208
|
| 3 |
+
size 558554290
|
global_step116760/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30773ebe2f7d0dda768d52504c81fc9d1dbfa9cd102570a86a4948d73e13b0a7
|
| 3 |
+
size 558554290
|
global_step116760/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4055ca50948226701db5889b726f987f9014477c150f3f4f6057cbe23ec0820
|
| 3 |
+
size 558554354
|
global_step116760/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de4853950043243ea3246f7c62cd92c7f823651a697622918308bd2f992072aa
|
| 3 |
+
size 558554418
|
global_step116760/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4608bec0420f77b9553654b7e5c5dd6228cad2447a9a51d0382228f180679eb5
|
| 3 |
+
size 558554290
|
global_step116760/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb538a258e1347ae164257b00fb6c11cdf6b3d349fba73313e83a7b9be6decd6
|
| 3 |
+
size 558554354
|
global_step116760/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ae2c3a9376d4944667e65cd39787349a27b619b81544f646f94a0ab38800184
|
| 3 |
+
size 4468641136
|