{ "metadata": { "ParamSize": 867, "ParamBytes": 6619494912.0, "BitsPerParam": 4.3451667303689145 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 503439360, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 262208, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 503439360, "byteOffset": 0 } ], "md5sum": "712fd2bf03edf3f56fa2417a76ccce4b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 62929920, "records": [ { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 262208, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 62929920, "byteOffset": 0 } ], "md5sum": "53d948378b1e9a4703d25e934e460904" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "4a38bab24480940385a10987ddae625e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33185280, "records": [ { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 0 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 7680 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 29498880 } ], "md5sum": "fe9a9f1a9b52b46bd579946945e16d55" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.0.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.0.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "30d9abefd476d2bd43ac62b58ad1b1d8" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "a8215714b331b41264b9d6d9ff136357" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "e58281a60074315c6549a2e986843713" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.1.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.1.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "409bbfeaaad4b13c9ea2ef4799001099" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "df3716ae5c7342cb2b0996c410fb6086" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "445fbed34b5f0ae17fda18559ca586df" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.2.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.2.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "f6236471ec20d657a0cbb1d51889f984" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "26a93732cd312568ca250d082318391b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8905b563815c88dd685697e37a2dd79e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.3.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "a90213e5407ecbbc419ca3b90b7a8cc0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "bdeb067abc17a2c19d2f69eb35fc1b31" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33424384, "records": [ { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.3.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 22118912 }, { "name": "language_model.model.layers.4.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 29491712 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29492224 } ], "md5sum": "04eeb4259ec1a7bbec6f0348148c06c0" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "a621f37b2c35f788a0318a638451ee89" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "0c7fe9a4611ec05da053e24142c463f2" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26304512, "records": [ { "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 491520 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 8355840 }, { "name": "language_model.model.layers.4.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 9338880 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 9339392 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 17203712 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 18186752 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 22118912 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22610432 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22618112 } ], "md5sum": "dc563b2ddcf105a8b0f0673092b07b8f" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.10.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.10.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "4ebeb164faaf0ba8291b57b4c4438f8a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "e07e8b0c3b04fbd7910be27a8e1016d3" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "d6d23050a552897f5782193218451187" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.11.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.11.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "d90f9e7ce0dc12a8a2425f097d157361" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "dee770cfd4cbdb047800fbb1e6c44488" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ab04772b7383a8de7f76cf4b5d813db3" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.12.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.12.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "3b10ac95463d4b82b78fa62bab4ebd47" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "1b6d792524f531c87bed6543715cdf94" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "87107f94ab48dcee989396040444d30f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.13.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "3f79d1a977853e95fc2970705aae67e4" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "1d42d9e39494b3ca2d1dfe51f64ba0ee" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8c777b3c25714b64d42d5207c66518aa" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.13.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.14.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "7e8d623930b4d880f99f656c9f43192a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "a1def84a691f6486cc3d15da609dc9aa" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26542592, "records": [ { "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.14.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 } ], "md5sum": "38f5300f6e08e00cc7be142f015d4e6e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33424384, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.15.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7372800 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7373312 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11305472 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11796992 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19661312 }, { "name": "language_model.model.layers.15.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20644352 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20644864 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28509184 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29492224 } ], "md5sum": "7e0da19ff1bf8dc0027d434a08e36204" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "b8fb26a51246d29679670e7c05095aff" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33208320, "records": [ { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3686400 }, { "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3694080 }, { "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3701760 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3709440 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 3717120 } ], "md5sum": "e797415fa9ab38b33e50abfb460ea573" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "5318496ecbbf1327db0ceb2fb527f24a" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.5.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.5.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "8e7ea3766366a5f5b644dd4dff58e61b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "f5a40c953aa3021edab6720585765d6b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "3ec9d9a560acfeae774b8b81d8d119b3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.6.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.6.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "e202c920b4b8d88b222494c2ee44059b" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "14b094a795f316ef163f455bd77a659c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8b14b62c19300465197a74bf0837a6d1" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.7.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "96ea42961faf9e2c70e1154c8164d418" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "644777746d3e7349793303dde95edf2a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "02fab52264a8d3ae0979a4c5ed3733e4" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.7.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.8.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "9cf171b2bdafd47429331876da77cf01" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "3b8b298e932667505f8850a91d97d6e8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "cc9ce45613296e365478d077f8400cef" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.8.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "9960cd47ab0a2af93729cf6005da5075" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.9.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.9.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "0565bd49f154e68bd37a3e6de292e2b0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "ecbb3d6e5f42a98c01de350265169bc8" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33208320, "records": [ { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3686400 }, { "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3694080 }, { "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3701760 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 3709440 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 3717120 } ], "md5sum": "1ec7f185a993046bec96bda219521612" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "73747cc81445442f55b06c3c72a01890" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.16.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.16.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "37e56035215fb7c240c943e885447747" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "313d941e1471d87168fe8ea674564956" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f5ece1750c376c2dda76d6f8a7ff349f" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.17.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.17.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "a04e8db9792636e9472281ba50415504" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.18.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "69c61fa327e95b5fc26cb9a9e950c32e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "c18a9906a1dfe2edf1dd974cd66e1167" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.18.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "329545b54d2346b1896cac2d0abf679e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.19.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "d3d93cfa507b5154a3c8c29657e2937a" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "df8e790716f5e9057d39a6e990652537" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.18.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.19.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "eb9af13b8866de1e6ca34f29db8dd668" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.20.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "77f01bd36acd73f0a4109d4e432d7354" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ba5143072f4dcc0ccfc46cd041342faa" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.19.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "74938eee2bbd559f282f84ca8f3a9c43" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.20.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.20.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "d58115f4b4827e8754c16a6224b13736" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "4d26137794f1f36036c6b5897858ff23" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "285e1d2d445b4576bc8d32669d9fce02" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.21.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.21.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.21.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "21dff8d2a47f571e17947fbf2ba1f783" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.22.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "77746c7353b489872e9f29919420dc14" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f12f0e5f3fc0a08e75108d82ffdf375c" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.22.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.22.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "d5a2093bb13dca67ba4eea9a9a20a31d" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "4822d327eb2b183a457e983cdbc11cbd" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "63d567f5a9c6e71ff1d8e5a2390b4c68" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.23.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.23.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "3cfd0dd16a44ca145562c6d333644594" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.24.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "b18c3025bd302466bbae1d01781f0083" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "61f9d5ce2772ebcb0ecbef29ef96ea7a" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.23.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.24.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "fb91f28653279d0ec2addd8ed5c5981a" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.25.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "28fa48ab7e0741b2990cb90585ef5a1d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "e1a1d57c58a78dcb653f5d1231d572c2" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.24.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "aa026a59401a32c2c97a2348169bf410" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.25.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.25.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "1fd534062ab3c0daa668383a9d52d8c2" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "c4fe342088dffea012ef5104b676a4b4" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29983744, "records": [ { "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 491520 }, { "name": "language_model.model.layers.26.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7864320 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7864832 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11796992 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 12288512 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 20152832 }, { "name": "language_model.model.layers.26.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 21135872 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 21136384 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 29000704 } ], "md5sum": "c44f514505588fb51a43192d6a18aea2" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.26.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "11d039bff7263fcf404a8bd02120b00a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.27.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "5d23d82d69048d083afe8411bad76527" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "3b03db1b2ea2499103d9bd77d957e7df" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32502784, "records": [ { "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 8117760 }, { "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 8125440 }, { "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 8133120 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 8140800 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 8148480 }, { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 11834880 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 19207680 }, { "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 19215360 }, { "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 19223040 }, { "name": "language_model.model.layers.27.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 19230720 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 19231232 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 23163392 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 23654912 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 31519232 }, { "name": "language_model.model.layers.27.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 32502272 } ], "md5sum": "e6e46caa5021e6fac6935075739849a4" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.28.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "0558734b87259bd470d6ee0c141784ec" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "34c58169ccd4f63836be3fee80be096a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.28.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "82c63b47b57aee8166c06b62c64fd924" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.29.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "9045e141269c0cb0b5ec5b92494e9cff" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "1f8353d524e380708dedeec848746408" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.28.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.29.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "c724478f601a39e643f1bbbb1fd7b452" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.30.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "c8f548e896656d4e4cde3292ecfbcb3a" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "7191d7d83ba5495cb23fec6014a75cfc" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.29.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "01a51d0d055b9005fddf30e5b9782e59" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.30.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.30.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "eeb596d051a965a0b6eedd86a0edf23f" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "487bd65dcb405026b157ff9bbfeb09e6" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "2fd6ae7a6282648798f5ccd57cfb7582" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.31.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.31.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.31.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "12cb120759129740ab2a57a7e020bd0d" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.32.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "5ca2107242a75db0083b9cdcdf4a2c5c" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "bef6cb576fe659bdebd4e536a8e97ac7" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.32.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.32.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.32.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.32.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.32.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "de886af92142ffe1d54261487dfacecf" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.33.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "b9e9635755da59634f45e74dc912752f" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "1db1891bfe00dfd2558aed67af643ad3" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.33.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.33.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.33.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.33.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "933a01097d719b586a11a969ffe63a8a" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.34.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "6a6cec8a9525b6d0e1899be8653d4f9d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "da2536a26a3dcbeaec29c9d71743f220" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.33.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.34.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.34.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.34.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.34.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "11b7bb64421e2b609603dda777743d5e" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.35.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "9c57c4402235c9921ce927b60e746d6d" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "0ce33e39e28f26108348692d1ba7a781" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.34.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.34.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.34.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.34.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.34.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.34.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.34.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.34.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.34.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.35.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.35.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "8dc0358bc0d1c96cbde609674f41598e" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.35.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.35.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.35.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.35.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.35.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.35.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.35.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.35.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.35.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.35.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "b9278ec2fb4c85ba601ed5d9643a4af5" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.35.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.36.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.36.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "7974f71df45f40bd46a03caf23943951" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f9b023e5b2f1fc18112c37c719f1e74d" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.36.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.36.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.36.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.36.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.36.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.36.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.36.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.36.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.36.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.36.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "63295d72c757350a3437d3511488c825" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "33fa3f530950f965ad61428045f6925e" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32932864, "records": [ { "name": "language_model.model.layers.36.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.36.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 4423680 }, { "name": "language_model.model.layers.37.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11796480 }, { "name": "language_model.model.layers.37.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11796992 }, { "name": "language_model.model.layers.37.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15729152 }, { "name": "language_model.model.layers.37.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 16220672 }, { "name": "language_model.model.layers.37.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 24084992 }, { "name": "language_model.model.layers.37.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25068032 }, { "name": "language_model.model.layers.37.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 25068544 } ], "md5sum": "bf4d38b16cd37349c809affc1c37e99f" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.37.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "2b322774d47f0fb2cd9d0be441a4985f" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.38.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "be3c6ba8cacb204c1577eba688837039" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "2f3c08c171d4e0a8bf53ff77d13ba7cf" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33485824, "records": [ { "name": "language_model.model.layers.37.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 0 }, { "name": "language_model.model.layers.37.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 983040 }, { "name": "language_model.model.layers.37.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 4915200 }, { "name": "language_model.model.layers.37.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 5406720 }, { "name": "language_model.model.layers.37.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 5414400 }, { "name": "language_model.model.layers.37.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 9100800 }, { "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 9108480 }, { "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 9116160 }, { "name": "language_model.model.layers.38.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 9123840 }, { "name": "language_model.model.layers.38.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 9131520 }, { "name": "language_model.model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 12817920 }, { "name": "language_model.model.layers.38.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 20190720 }, { "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 20198400 }, { "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 20206080 }, { "name": "language_model.model.layers.38.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20213760 }, { "name": "language_model.model.layers.38.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 20214272 }, { "name": "language_model.model.layers.38.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 24146432 }, { "name": "language_model.model.layers.38.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24637952 }, { "name": "language_model.model.layers.38.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32502272 }, { "name": "language_model.model.layers.38.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33485312 } ], "md5sum": "94cd392d8f0a6e7f29e02a700aedb47c" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.39.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "fa9194d92ae91ec9296133f2c175cef2" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "1c0406e247b9d91aa192d82634c4347f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.38.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.38.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.38.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.38.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.39.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.39.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.39.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.39.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.39.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.39.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "2c0058444a5eeee03bee08eedbf444dd" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.40.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "fc609c22cbfff42df4cef3d4b300f46b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "494b1389a068dd24844745add0c2f322" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.39.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.39.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.39.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.39.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.39.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.39.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.39.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.40.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.40.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.40.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.40.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "582694d9a75c9bf726fba8e0a6ada0cb" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.41.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "eb5c097417de6a05248b7e7ceb1f0292" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "4f643265edb2c22b7e6526a214c35fe6" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.40.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.40.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.40.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.40.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.40.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.40.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.40.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.40.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.40.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.41.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.41.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "3dff7750db67f5acf9a73e06ff9a6b87" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.41.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.41.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.41.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.41.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.41.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.41.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.41.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.41.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.41.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.41.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "96dc1f5904a935d6a3a3a54fddd71dcc" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.41.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.42.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.42.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "fb6a1795e5c43fb4f80d3f2efdff4243" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "aae10e6905473650608b068cb2333a97" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.42.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.42.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.42.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.42.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.42.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.42.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.42.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.42.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.42.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.42.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "90074a80e3164e77b2d20905c58b623c" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.43.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "b68b07ca2c463c7cd67559e765fc02fe" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ae977f0bfaa86c9700d53b9014a1866b" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.42.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.42.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.43.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.43.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.43.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.43.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.43.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.43.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.43.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.43.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.43.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "008e340c3934109f350410b7a680554d" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.44.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "45e59f455e308248f305406aed27470d" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ac34fce13a3ba3c5d53357a939587948" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.43.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.43.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.43.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.43.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.44.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.44.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.44.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.44.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.44.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.44.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "8c7159a83be38e8788e24df1fcd4b40e" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.45.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "173134f9f140e89d741fb802cdaf4914" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8c23daa52bde599c0b268cbc05f5dc4f" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.44.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.44.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.44.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.44.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.44.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.44.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.44.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.45.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.45.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.45.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.45.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "3d6135fadf8e93cbf00d7b0c7ac224a1" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.46.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "4434cdc5b0681bf260fb201a4c979172" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "a226487fe3bb8b9a1eaf8f6d3ffa6e9d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.45.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.45.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.45.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.45.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.45.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.45.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.45.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.45.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.45.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.46.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.46.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "36d53c6b293c3de8029e85d96e4fb3f1" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.46.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.46.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.46.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.46.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.46.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.46.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.46.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.46.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.46.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.46.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "c70f03c017633f788e4540a4eea252df" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.46.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.47.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.47.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "9dc0dd4918e39d2eb75c441c444842a5" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "5222bd421da77607899d2c42878f6000" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.47.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.47.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.47.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.47.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.47.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.47.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.47.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.47.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.47.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.47.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "1d2da6aa25795c844b68e26bd1473757" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 4431360, "records": [ { "name": "language_model.model.layers.47.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.47.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.norm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "f32-to-bf16", "nbytes": 7680, "byteOffset": 4423680 } ], "md5sum": "a809d14df600b4f32cf3993462e65db6" } ] }