mixtral_8_instruct_7b_en / model.weights.json
prasadsachin's picture
Upload folder using huggingface_hub
7afd15f verified
{
"metadata": {
"total_size": 93405585408.0
},
"weight_map": {
"/layers/reversible_embedding/vars": [
"model_00000.weights.h5"
],
"/layers/mixtral_transformer_decoder/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder/_self_attention_layer/key_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder/_self_attention_layer/output_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder/_self_attention_layer/query_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder/_self_attention_layer/value_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder/_sparse_moe_block/expert_bank/vars": [
"model_00000.weights.h5"
],
"/layers/mixtral_transformer_decoder/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_self_attention_layer/key_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_self_attention_layer/output_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_self_attention_layer/query_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_self_attention_layer/value_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_1/_sparse_moe_block/expert_bank/vars": [
"model_00000.weights.h5"
],
"/layers/mixtral_transformer_decoder_1/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_self_attention_layer/key_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_self_attention_layer/output_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_self_attention_layer/query_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_self_attention_layer/value_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_2/_sparse_moe_block/expert_bank/vars": [
"model_00000.weights.h5"
],
"/layers/mixtral_transformer_decoder_2/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_self_attention_layer/key_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_self_attention_layer/output_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_self_attention_layer/query_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_self_attention_layer/value_dense/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/mixtral_transformer_decoder_3/_sparse_moe_block/expert_bank/vars": [
"model_00000.weights.h5",
"model_00001.weights.h5"
],
"/layers/mixtral_transformer_decoder_3/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_self_attention_layer/key_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_self_attention_layer/output_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_self_attention_layer/query_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_self_attention_layer/value_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_4/_sparse_moe_block/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/mixtral_transformer_decoder_4/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_self_attention_layer/key_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_self_attention_layer/output_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_self_attention_layer/query_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_self_attention_layer/value_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_5/_sparse_moe_block/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/mixtral_transformer_decoder_5/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_self_attention_layer/key_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_self_attention_layer/output_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_self_attention_layer/query_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_self_attention_layer/value_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_6/_sparse_moe_block/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/mixtral_transformer_decoder_6/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_self_attention_layer/key_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_self_attention_layer/output_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_self_attention_layer/query_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_self_attention_layer/value_dense/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/mixtral_transformer_decoder_7/_sparse_moe_block/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/mixtral_transformer_decoder_7/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_self_attention_layer/key_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_self_attention_layer/output_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_self_attention_layer/query_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_self_attention_layer/value_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_8/_sparse_moe_block/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/mixtral_transformer_decoder_8/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_self_attention_layer/key_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_self_attention_layer/output_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_self_attention_layer/query_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_self_attention_layer/value_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_9/_sparse_moe_block/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/mixtral_transformer_decoder_9/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_self_attention_layer/key_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_self_attention_layer/output_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_self_attention_layer/query_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_self_attention_layer/value_dense/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/mixtral_transformer_decoder_10/_sparse_moe_block/expert_bank/vars": [
"model_00002.weights.h5",
"model_00003.weights.h5"
],
"/layers/mixtral_transformer_decoder_10/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_self_attention_layer/key_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_self_attention_layer/output_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_self_attention_layer/query_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_self_attention_layer/value_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_11/_sparse_moe_block/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/mixtral_transformer_decoder_11/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_self_attention_layer/key_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_self_attention_layer/output_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_self_attention_layer/query_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_self_attention_layer/value_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_12/_sparse_moe_block/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/mixtral_transformer_decoder_12/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_self_attention_layer/key_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_self_attention_layer/output_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_self_attention_layer/query_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_self_attention_layer/value_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_13/_sparse_moe_block/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/mixtral_transformer_decoder_13/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_self_attention_layer/key_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_self_attention_layer/output_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_self_attention_layer/query_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_self_attention_layer/value_dense/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/mixtral_transformer_decoder_14/_sparse_moe_block/expert_bank/vars": [
"model_00003.weights.h5",
"model_00004.weights.h5"
],
"/layers/mixtral_transformer_decoder_14/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_self_attention_layer/key_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_self_attention_layer/output_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_self_attention_layer/query_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_self_attention_layer/value_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_15/_sparse_moe_block/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/mixtral_transformer_decoder_15/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_self_attention_layer/key_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_self_attention_layer/output_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_self_attention_layer/query_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_self_attention_layer/value_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_16/_sparse_moe_block/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/mixtral_transformer_decoder_16/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_self_attention_layer/key_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_self_attention_layer/output_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_self_attention_layer/query_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_self_attention_layer/value_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_17/_sparse_moe_block/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/mixtral_transformer_decoder_17/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_self_attention_layer/key_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_self_attention_layer/output_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_self_attention_layer/query_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_self_attention_layer/value_dense/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/mixtral_transformer_decoder_18/_sparse_moe_block/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/mixtral_transformer_decoder_18/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_self_attention_layer/key_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_self_attention_layer/output_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_self_attention_layer/query_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_self_attention_layer/value_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_19/_sparse_moe_block/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/mixtral_transformer_decoder_19/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_self_attention_layer/key_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_self_attention_layer/output_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_self_attention_layer/query_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_self_attention_layer/value_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_20/_sparse_moe_block/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/mixtral_transformer_decoder_20/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_self_attention_layer/key_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_self_attention_layer/output_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_self_attention_layer/query_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_self_attention_layer/value_dense/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/mixtral_transformer_decoder_21/_sparse_moe_block/expert_bank/vars": [
"model_00005.weights.h5",
"model_00006.weights.h5"
],
"/layers/mixtral_transformer_decoder_21/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_self_attention_layer/key_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_self_attention_layer/output_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_self_attention_layer/query_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_self_attention_layer/value_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_22/_sparse_moe_block/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/mixtral_transformer_decoder_22/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_self_attention_layer/key_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_self_attention_layer/output_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_self_attention_layer/query_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_self_attention_layer/value_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_23/_sparse_moe_block/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/mixtral_transformer_decoder_23/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_self_attention_layer/key_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_self_attention_layer/output_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_self_attention_layer/query_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_self_attention_layer/value_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_24/_sparse_moe_block/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/mixtral_transformer_decoder_24/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_self_attention_layer/key_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_self_attention_layer/output_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_self_attention_layer/query_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_self_attention_layer/value_dense/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/mixtral_transformer_decoder_25/_sparse_moe_block/expert_bank/vars": [
"model_00006.weights.h5",
"model_00007.weights.h5"
],
"/layers/mixtral_transformer_decoder_25/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_self_attention_layer/key_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_self_attention_layer/output_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_self_attention_layer/query_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_self_attention_layer/value_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_26/_sparse_moe_block/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/mixtral_transformer_decoder_26/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_self_attention_layer/key_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_self_attention_layer/output_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_self_attention_layer/query_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_self_attention_layer/value_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_27/_sparse_moe_block/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/mixtral_transformer_decoder_27/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_self_attention_layer/key_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_self_attention_layer/output_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_self_attention_layer/query_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_self_attention_layer/value_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_28/_sparse_moe_block/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/mixtral_transformer_decoder_28/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_self_attention_layer/key_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_self_attention_layer/output_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_self_attention_layer/query_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_self_attention_layer/value_dense/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/mixtral_transformer_decoder_29/_sparse_moe_block/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/mixtral_transformer_decoder_29/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_feedforward_layernorm/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_self_attention_layer/key_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_self_attention_layer/output_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_self_attention_layer/query_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_self_attention_layer/value_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_self_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_30/_sparse_moe_block/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/mixtral_transformer_decoder_30/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_feedforward_layernorm/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_self_attention_layer/key_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_self_attention_layer/output_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_self_attention_layer/query_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_self_attention_layer/value_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_self_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/mixtral_transformer_decoder_31/_sparse_moe_block/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/mixtral_transformer_decoder_31/_sparse_moe_block/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/mixtral_layer_normalization/vars": "model_00008.weights.h5"
}
}