| { | |
| "metadata": { | |
| "ParamSize": 163, | |
| "ParamBytes": 635179008.0, | |
| "BitsPerParam": 4.111808426896466 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 131334144, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 128256, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131334144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "948d42e013589644f717b8f2742e703e" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 16777216, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cc0bf968818c6f4b270e2e22945d1eb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30107648, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 128256, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16416768, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 16416768 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16420864 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 24809472 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 24813568 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 24846336 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 24850432 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 27996160 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28002304 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30099456 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30103552 | |
| } | |
| ], | |
| "md5sum": "9e192813c3d7e96b6a4cbf5877bd9836" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "e93aca3dfa86a100ed4f111c34b5c0a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "7b4e0769d79ab90df5efe003aa51c259" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "a64b0da10bc0cf5b7552500279b8493b" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "881d9a891e6f65fcc938cedcac8d8452" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "0e367dc74ed7da25333139473cb41755" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "3e44a7b361cc6bd355dc515df19691a6" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "c49d41b4843774e14e6edf73267c114b" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "e59353e72d4e0016b38dd491c2e63dae" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "ae86125b67ec8b3658879fa112ef638c" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "0371fcee5e98a199735fe9f22b25d26d" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "e77942260c6c7f70253ab08c56e50118" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "1a4b67701fd5e1be8280ab9b07f05163" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "8d327674eb0ce5f2fb51a4b99157a49f" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "266c20fccdf14ac656058f8ea09672ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30464000, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 8192 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16777216, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32768, | |
| "byteOffset": 25169920 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 25202688 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1536 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 25206784 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28352512 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "int8", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 28358656 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30455808 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "1f27bc4b570063c2d92c0fe07fccad1a" | |
| } | |
| ] | |
| } |