numen-tech's picture
Add weights
bd5fefb
{
"metadata": {
"ParamSize": 225,
"ParamBytes": 567349248.0,
"BitsPerParam": 4.069983930948992
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "compressed-shard",
"nbytes": 32768000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 32768000,
"byteOffset": 0
}
],
"md5sum": "1be665a639d190f61c5d34e276a7c6cf"
},
{
"dataPath": "params_shard_1.bin",
"format": "compressed-shard",
"nbytes": 32768000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 32768000,
"byteOffset": 0
}
],
"md5sum": "f78e925e4837b0592071618d3da8c979"
},
{
"dataPath": "params_shard_2.bin",
"format": "compressed-shard",
"nbytes": 30715904,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024000,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024000,
"byteOffset": 1024000
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2048000
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 2052096
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 7819264
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 7999488
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 19533824
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19894272
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 19898368
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 22519808
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 22601728
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 24698880
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 24764416
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 24768512
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 30535680
}
],
"md5sum": "9c45b2252b5f35f1fad65bec2e691a79"
},
{
"dataPath": "params_shard_3.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "c717d2c0b063c591e7d229aa6325a1e8"
},
{
"dataPath": "params_shard_4.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "76aeab71aa1968bc2cceb2100883eba4"
},
{
"dataPath": "params_shard_5.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "e9e75d24ff2b3db048721abab01b3288"
},
{
"dataPath": "params_shard_6.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "357d152d84ac3f3aba95f6a3cf6913d4"
},
{
"dataPath": "params_shard_7.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "dfcada232de0bc083b24c230c662ff13"
},
{
"dataPath": "params_shard_8.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "b038328ec6827fda8d0824b2726d4e83"
},
{
"dataPath": "params_shard_9.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "63e9c1b3f3e58aa1ea95e349c717bfee"
},
{
"dataPath": "params_shard_10.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "0ea9d4c782467632011b653e67b52fb4"
},
{
"dataPath": "params_shard_11.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "4f281e72fe863ffafda22d3977c777a2"
},
{
"dataPath": "params_shard_12.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "9fdf9c046972a3ae56bc03c3f6211390"
},
{
"dataPath": "params_shard_13.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "91db6e0a950f38ed6340b3591ecfd736"
},
{
"dataPath": "params_shard_14.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "03fe8c87efcdf162fe63f0685c6ca204"
},
{
"dataPath": "params_shard_15.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "db99284e6fe20a57914f979ef91fc411"
},
{
"dataPath": "params_shard_16.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "c3ebafd37d1703733176f92873e72043"
},
{
"dataPath": "params_shard_17.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "29cee1e9fca2f666d84b4247626343f4"
},
{
"dataPath": "params_shard_18.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "ea03cfb1779a1bc71bdd48ffbf3c4eba"
},
{
"dataPath": "params_shard_19.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "cbb746c818d5044c607f1122eb155765"
},
{
"dataPath": "params_shard_20.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "8e893e4ebeafc8c95d1844aaf69198f3"
},
{
"dataPath": "params_shard_21.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "7df98656805afd3173490dfea056bfeb"
},
{
"dataPath": "params_shard_22.bin",
"format": "compressed-shard",
"nbytes": 22716416,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2048,
704
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 16769024
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 22536192
}
],
"md5sum": "70548e152d72be1ef0ba090d8d4f4139"
},
{
"dataPath": "params_shard_23.bin",
"format": "compressed-shard",
"nbytes": 16769024,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
11264,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
11264,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 11534336
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11894784
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 11898880
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
2560,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 81920,
"byteOffset": 14520320
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14602240
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16699392
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16764928
}
],
"md5sum": "8755c2f041d28b45a10cada300c4d451"
}
]
}