diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5575 @@ +{ + "metadata": { + "ParamSize": 405, + "ParamBytes": 6712005120.0, + "BitsPerParam": 4.063487610779596 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 81960960, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32016, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81960960, + "byteOffset": 0 + } + ], + "md5sum": "e90bf78868d2d7b96ffa855a88eef74d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "64a292af0e964de0c784d11725cf5f64" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c1bc739a2829f14d46c27320688aec8c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0de49f560087d344bfadec0eef355d40" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "86a067af9a39d2ec2810bc8963c80f98" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e7f308b4df025b6fd492a9f5833ca6ba" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6d2fa927d808305c0ec5c3b91d9b61ff" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25212160, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32016, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2561280, + "byteOffset": 0 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2561280 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 2571520 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 3677440 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 5889280 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 5899520 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 7128320 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 20235520 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20645120 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 20655360 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 21761280 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23973120 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 23983360 + } + ], + "md5sum": "59881f53335fbe4d15ba8d5091465ec1" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 81960960, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32016, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81960960, + "byteOffset": 0 + } + ], + "md5sum": "57c5f804dad7cc506e50eff7fc83947b" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a891cc95366c3b930df9cc07f9c1c5f3" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4b315365f99dea4b253ca883405e07c8" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d2a738663fdc0d93076297f25b62df9f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 20655360, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32016, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2561280, + "byteOffset": 13527040 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16088320 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16098560 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 17204480 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19416320 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 19426560 + } + ], + "md5sum": "72a7726d2110630a36c0fff45557a024" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3fc82446bf6db3443ba491428b116eea" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "eff2db1fe3e084ddc881892d6ecbf200" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "dfcb643fe992f5d96686ee43fd697c42" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c86a73b58b9d1fd84913cb4157f0b323" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "85510f8af33c679c87da8ed1ce825c20" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "c393a204a7447246cba98e5ca4ba2d64" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4af53babc54cdf634cfeed880e493b8d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "88e00a472dd1ff521e82dfd6d4e9cc73" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "23854538162d83b2894fc47e9a1a147c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a9e73b6fde85971ef039ac0d1d8064c1" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "81ace50d892fd186b1136db832e3df31" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c75f983190f033d4bf3b111e72a330a7" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "309ad94f2dfbcd850e9a6113e1cfca33" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7248392d4fc87585c87840d6c298753c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "aae6a524e90b12e0f574b23be28fd47a" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ebe0b3a97bf53e3c1d4f91500e93296" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "313489f055edb422900d6de2a91b8cca" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "74f253df25acb10200bd3893c182b62e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e9c5548716f66ad262f34f024ec9226f" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2572539555c102cf50982bd129b6a095" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c78717a7e56e6ea200086b74db356ef6" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "f482979f9481fef5784b31aac30bf3de" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "073d952078c25fec24a595d19f1cd3b9" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c2b8a797f1a44e5bf7cebcc249d7121e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ed6a4b0bfad603fc2c94a6e98719069a" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f029e37b4fc29e20fec21474d4ef9760" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "df57239709cf75970a0ae933a777cb94" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32839680, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 13516800 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 14755840 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15861760 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18073600 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 18083840 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19312640 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 32419840 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + } + ], + "md5sum": "2948fe47b0bebedc8ed59df1f631113f" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a538b3be7d9b4f036728485e08947ecf" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "29238bcdd4b291559e983b95b2621657" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "60eddedb10cd9d20d88e8a4f1484213e" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2fa5c4f46a58f9810809264d29336871" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ea3dd11e4fc0d2b9d39f4f0da79eef1e" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 22640640, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 1105920 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 3317760 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 3328000 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4556800 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 17664000 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18073600 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 18083840 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 19189760 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 21401600 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 21411840 + } + ], + "md5sum": "b83b80747294a7b1df6631e9f34a71fe" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8fef8eeffef93848934ce26e5fb01637" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "16a3b854bd9677d91108c01d18e3f55e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d50ff1ab19a31bd5a46179d88ab3275c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4ea0cfa06953a37c3b9ebf9afcf228cb" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "45a717f9b07313ea20d5e836f65b4f83" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "4ac591e728a0af710edba141607e593b" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "81babce9b38f679dab6a8c0c7581e9ee" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c2dce10a311461038e9e507d036c5e09" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5a5e3aa541c7a803ea1ed05a50f4e38b" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33413120, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 20305920 + } + ], + "md5sum": "1eab697cb7a7f34ab7191e5fb30501d8" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "faafee0e574b5534bef37ddb8e04ee69" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0646569828b29f7914241904c1f2bbf3" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "877eaa91ddf612920ead9d6f366ab0af" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ec4a82478b09c617fb4157b340a45efb" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "651894280a42562a5559f0dd7a8044e2" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "115dcc48bbac6423cc2ec55fa5639e14" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 23060480, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 0 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 409600 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 419840 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 1525760 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 3737600 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 3747840 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4976640 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 18083840 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18493440 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 18503680 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 19609600 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 21821440 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 21831680 + } + ], + "md5sum": "8536d275a0c2bb7a62462a3a0e27814b" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7e37f35243f1ed706e6487b4e6b06b2c" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "46612c1864d1f806b1a4141214e004e6" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e24c9f8e5cc6bc980c6d9481c5dea949" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6f055cd649e2b0714a78c71ddb106991" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "59d8e3c8334969bba08a913ab41f5710" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "ea6380c75920a73ebc43b1984b444996" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6dd84e813598b8be632473a89570ef07" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "489e741ad7fa4524424e081b84f33338" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "746cacae070383b4eeeadea441b04ec5" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "bdf856b13d31ea8d655047fb747c01a3" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "495b65008460d8662bf7ee4e8ec1d00a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ff53f4a3ef3164e63c8f942ab73c9b83" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9633450ab8f5df24e6d96776fe84faf7" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "46e5ca805281398a105a0d08ec9847d8" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a47e774f273e46fe2d49131b90170912" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "da07b19d324b169e0947d8b45af5f942" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "5e73594716ce8b583fa71a885fc19769" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3cf4872ab1d91faa014c953aaed6c4fe" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1cf774fa4d10dcf2f2b4764e4053982e" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b7e845b1404e3cd1081696c15cbfe853" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c5130f5120f44577cb6e856190b17d9e" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "3c7cfdf5e7a2e59162be984207f41e8e" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "922a1a0ca6efdc17ad6b38ad13922271" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "90d1cda1e14d0642a886fbda99e24623" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6fc1e84f9066a5f660b48975a74a68db" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2b4ce30ca32245de9b6fd2cc6a95e77c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 31600640, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + } + ], + "md5sum": "c6bb93bd26013e24d053a413510d9679" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "98f07a2dd3fa3cefb048133f1dde9072" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5e989e6310ac6587d5cd0c9c66ca4fe9" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c9dd2ac88fa6b09904bafe41d4fc0fcc" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ca78faf5782caac585805bebb4375cb7" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8d50065b0d9b07b754090900fa1cfa04" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 22650880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2211840 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3440640 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16547840 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16957440 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16967680 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18073600 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18083840 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 18094080 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 19200000 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 21411840 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 21422080 + } + ], + "md5sum": "7b855f5622c6ac6bbea1fd87f14d0c2d" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "0e85b76d95c0f643d69a5df1fd417ef3" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4f620770451b64eefe36a3761230c129" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "139ff784cd520ebc8c5b317e0e3f0c35" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a8d9d6c2d1108b7d2cf148bae35b3c9c" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a9ed93a2a3a01c8f37bb6a072d74094f" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "37d04a49d3d78baf0c0c5ad0b6bcb265" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "99b44327a3cbe9a1ff5e274b82e5467f" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a3e592998d9a33cde53b4b31f004b651" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a2a7e847fdc4ff49455ecc162ee8b60a" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b001b12eecd22173d4cbd7b4aca4f1dc" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "2e0b2e01a9be43af682e337d75453387" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5621c03faa85ef7b5bb01073c0602bc9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4d00ae90a7b96aae481a1760a9a08764" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "074b139e6c215f454f7f1aff2cba3ec4" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e142dadcd34be3923908ec4d808f9960" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fe6ded124f8473cf9911c7ef0333d51a" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "70ee662d5bf24368569d1086ca7ad023" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4e9a8582910ffa31666fa7eb166cba2c" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5e45e6df042d604b9eec96a307a52d71" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "913911b0c5e5f72a4cf9b3ba734fd985" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8b84e1d13148fdce83d0b4778ac57289" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "3c67bf0e572ec14a90f67d43a38fc3e3" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "05d6e4417836db8b6d88e9a255a1ffd7" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "912a9511034fed9c5e33232b86524e23" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "10c0e7cbd9ec3f69d088d44da03481ee" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "62b3023dc8def599d38d241d76ff60ea" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "92562549c56e46825f4e509497b701cf" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 13516800 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15861760 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18073600 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "97e0174d2f95b6ebe288f8ab847296d6" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2619c239571057a85e329a9a475b4804" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6dc3f0c1c8dce1034c87aad4146bc63d" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "92cdba392fa01431472e9ffb0319374a" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "fe3c900d66cba3bd84ba101d096e9223" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "e0d6485087f60482fce74cc95471d2ff" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "eb048f5e1341d7da8bd8e1c3f349a186" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "da7e7429b6498ed72c9ea3928d6536a4" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8e8c7539bb238f47544718cb060fea86" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "cf9c0086bf5e74998d3ec240ccb5df06" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b8c401634b19736037895d0dda65552d" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "84b5097bd4c942d797876d659fc0a5d1" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6bf80847b6bb94fc6241590681218b6e" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c0277fc14b3a022480310413a7aa1093" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dcd2e4b7beb76eb91f46728a2b771a53" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9161aebca968854530138bf7d06ee363" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 21534720, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16967680 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16977920 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18083840 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20295680 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 20305920 + } + ], + "md5sum": "6f20caa037093e937d558290c85227a7" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ea5c24e0d6e4b4f0ba958960fccaf2cc" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4dfa191cb0e0861d86637522d9fbd4d0" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4e5ecb68773609d2e4a64d01d246cd65" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c85988476cdce64214263511235f03a2" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "464d6c29f98fc92ff79face1520a6671" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 32716800, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 16855040 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18083840 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31191040 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31600640 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 108 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 31610880 + } + ], + "md5sum": "21f932255e97fabfe658ccb4aee8fda9" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1a0143469152bccf47f74cdcf5f8e6de" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 16967680, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 2222080 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 3450880 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 16558080 + } + ], + "md5sum": "29fc17d766e0d9fe9438e6e25d8d54bd" + } + ] +} \ No newline at end of file