diff --git a/mp16/README.md b/mp16/README.md new file mode 100644 index 0000000000000000000000000000000000000000..00fcd6db77381e2da960f9cd73fb60113943f6c9 --- /dev/null +++ b/mp16/README.md @@ -0,0 +1,55 @@ +# Addition from the Hugging Face team + +> [!IMPORTANT] +> +> This repository corresponds to the original Llama format and codebase, not the transformers library + +The weights were originally distributed in the following format: + +``` +weights/ + consolidated.00.pth + consolidated.01.pth + ... +``` + +Unfortunately, the files themselves were too large for the Hub to handle, so we had to shard them. +In order to keep the same structure that was originally given, the sharding is done as follows: + + +``` +weights/ + consolidated.00/ + consolidated-00001-of-00011.pth + consolidated-00002-of-00011.pth + ... + consolidated.01/ + consolidated-00001-of-00011.pth + consolidated-00002-of-00011.pth + ... + ... +``` + +If trying to run the code that was given with the original weights, we recommend running this script to join the files together once again: + + +```py +import os +import torch + +from pathlib import Path + +path_to_files = Path('.../weights') + +folders = [folder for folder in os.listdir(path_to_files) if os.path.isdir(path_to_files / folder) and folder.startswith('consolidated')] +for folder in folders: + state_dict = {} + files = [file for file in os.listdir(path_to_files / folder) if file.endswith(".pth")] + + for file in files: + state_dict_partial = torch.load(path_to_files / folder / file, map_location="cpu") + for key, value in state_dict_partial.items(): + state_dict[key]=value + + torch.save(state_dict, path_to_files / f"{folder}.pth") +``` \ No newline at end of file diff --git a/mp16/consolidated.00/consolidated-00001-of-00011.pth b/mp16/consolidated.00/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f63735e0672c77830839b7208916bfad759d4ad --- /dev/null +++ b/mp16/consolidated.00/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc86424ff852193d9b933305c12001118ea236c3bd1fe7933eccaa1896668d7 +size 4986210686 diff --git a/mp16/consolidated.00/consolidated-00002-of-00011.pth b/mp16/consolidated.00/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb01fc46ebbe087e5d0e1e6c5e7331433ce5b6b5 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50e908742596c3558e387f78db5c81243160a25b83af0e3e4834dd6a0552493 +size 4983722986 diff --git a/mp16/consolidated.00/consolidated-00003-of-00011.pth b/mp16/consolidated.00/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac6464846dd01f374d37a204bd6b93206add4b34 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9693afc1227a7e78516c8728655b3a3867a6199a008499ed187327e65e4309cd +size 4975267626 diff --git a/mp16/consolidated.00/consolidated-00004-of-00011.pth b/mp16/consolidated.00/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1a565823509ca56358ce56c5dd9451952ce8bbc --- /dev/null +++ b/mp16/consolidated.00/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213547bf7ae6ad09da62fd0acf7b611a931ca9c98f763ae252cdfeb54e2b3e87 +size 4941712784 diff --git a/mp16/consolidated.00/consolidated-00005-of-00011.pth b/mp16/consolidated.00/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..a37841ab42c32d74c46f8ff5914f6bb3d357bbcd --- /dev/null +++ b/mp16/consolidated.00/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71e7f5887a40c655c3331e0645617362cca7523e4dc60afb3c821ce5dadcab6 +size 4983722986 diff --git a/mp16/consolidated.00/consolidated-00006-of-00011.pth b/mp16/consolidated.00/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd613572296176a2d50169e583379a1a3583dc35 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8cad5382ca0212e922bd9da1da023e553eda6da57662d3ea95b6f7e1947a7b +size 4975267626 diff --git a/mp16/consolidated.00/consolidated-00007-of-00011.pth b/mp16/consolidated.00/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..624ca4eb918fbe518e60a11c0a6e6bdd6b149690 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79730420e1e71634b6c69d757ef2b4ec99665c2f2a867fbacdf3c54a9b42165 +size 4941712784 diff --git a/mp16/consolidated.00/consolidated-00008-of-00011.pth b/mp16/consolidated.00/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7f24968fa637bfb33c27fefc69d380d8346b539 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc89cc655acb27cb2a993771b385f22ad7dc3c328778e913cd8d541769825b4 +size 4983722986 diff --git a/mp16/consolidated.00/consolidated-00009-of-00011.pth b/mp16/consolidated.00/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f03f4c8108b47e55a44dab867dd919847c733c9 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef33e3e1be26a44660b4861b8369c22222531baf22a190ae2973dfe975a1c61 +size 4975267754 diff --git a/mp16/consolidated.00/consolidated-00010-of-00011.pth b/mp16/consolidated.00/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..affaa13b3e7310c5dd03602fff205aa0741416fa --- /dev/null +++ b/mp16/consolidated.00/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b321e0963e92ac17fded54bcef1bd11fc799254e5844373552c7c3a341eba680 +size 4941712912 diff --git a/mp16/consolidated.00/consolidated-00011-of-00011.pth b/mp16/consolidated.00/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..795ae859c63251d7ca2f319a9c92ab865e72d6f9 --- /dev/null +++ b/mp16/consolidated.00/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89160b080ec21fce1f0857723c6c7f7f3b7058f219c65bb2252e91350f31b6cb +size 1579985478 diff --git a/mp16/consolidated.00/consolidated.pth.index.json b/mp16/consolidated.00/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.00/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.01/consolidated-00001-of-00011.pth b/mp16/consolidated.01/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e9faaca643669db2ed6740bde64c0683679ff7e --- /dev/null +++ b/mp16/consolidated.01/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c979c6b2ed50fd99986db093890b4f43a326beaa6d2e5e8d917357537d6d2320 +size 4986210686 diff --git a/mp16/consolidated.01/consolidated-00002-of-00011.pth b/mp16/consolidated.01/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed4546777b671e065116a23eb001b07c0007565c --- /dev/null +++ b/mp16/consolidated.01/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eee94fa7e1722978c86b8da39f0f993e69292d3f6dfdfa7e5a8d933a1ea8c89f +size 4983722986 diff --git a/mp16/consolidated.01/consolidated-00003-of-00011.pth b/mp16/consolidated.01/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..42f8e422ffb79b02f22212e8f2055df575598af7 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f219fb7ac813e08bfaaea14a25fde20d2f610547af879e5b27de4a940999df8 +size 4975267626 diff --git a/mp16/consolidated.01/consolidated-00004-of-00011.pth b/mp16/consolidated.01/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d77c1187ac20193c63d957ee9bc2d32efe27b44e --- /dev/null +++ b/mp16/consolidated.01/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94227aa8ab9e489345beacff5da91abbb05d35a37d027dd0ed319612fe4c25ca +size 4941712784 diff --git a/mp16/consolidated.01/consolidated-00005-of-00011.pth b/mp16/consolidated.01/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e0a2c7fed0192fdecc6e4577515f26d07e9e770 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0774f9126b5cc23becbc6a748bb18955cdd94b68c59bf462fede38d3bf33a270 +size 4983722986 diff --git a/mp16/consolidated.01/consolidated-00006-of-00011.pth b/mp16/consolidated.01/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..a827ae576932cf9d36439f834371d9efeb78bb63 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6c5e5707eacc559dfa74ad9d7ea5f11bb5104cf0e61b1d2f2ad66add7b5800 +size 4975267626 diff --git a/mp16/consolidated.01/consolidated-00007-of-00011.pth b/mp16/consolidated.01/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..acd58148d569ed557742de39d89fb876d2ea8827 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e35370e55465a5c1a14992e1b098763c6718fc4ddaae886fde3fb970b4a215 +size 4941712784 diff --git a/mp16/consolidated.01/consolidated-00008-of-00011.pth b/mp16/consolidated.01/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..3de654c5be0ddc2daa99cbffb9846ed871ee68f4 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e42fea070ee165b46f5f27240b92f515ca06529ae941ea2a3fe74bb8543f4284 +size 4983722986 diff --git a/mp16/consolidated.01/consolidated-00009-of-00011.pth b/mp16/consolidated.01/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d34ffd56f06a4f5b5a87dea091e0053bc70dee57 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e20a2fd5b4dc7dfb7a8d6df9f30604ea600264d40f25cd23e974cd991b0d44 +size 4975267754 diff --git a/mp16/consolidated.01/consolidated-00010-of-00011.pth b/mp16/consolidated.01/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b1908bb035920947715e6a47cf221327392fb19 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e951e87236b2301e161d6689c42f0e1e027899217163edd254056ce998d7f78 +size 4941712912 diff --git a/mp16/consolidated.01/consolidated-00011-of-00011.pth b/mp16/consolidated.01/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..418e2305d585f3e4021add8e6d1d846f0d2b53b7 --- /dev/null +++ b/mp16/consolidated.01/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79538c5485038b3440dd5201fe5e4a4762772876a2009856c3d13e99ec760922 +size 1579985478 diff --git a/mp16/consolidated.01/consolidated.pth.index.json b/mp16/consolidated.01/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.01/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.02/consolidated-00001-of-00011.pth b/mp16/consolidated.02/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..032e6518509d52e0a71b694044b2a5f2a760c67c --- /dev/null +++ b/mp16/consolidated.02/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c778d7d220350d9bf887c822c0bffe08f67d8a152120e00efe43f1e552e43a0b +size 4986210686 diff --git a/mp16/consolidated.02/consolidated-00002-of-00011.pth b/mp16/consolidated.02/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1c4168ca563551a2927ce9a853dcee6dad787ab --- /dev/null +++ b/mp16/consolidated.02/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b67c5009d2e8ea683306f61b9db2ba8a83407255b7334e969fa255158dc3464 +size 4983722986 diff --git a/mp16/consolidated.02/consolidated-00003-of-00011.pth b/mp16/consolidated.02/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd29c01ae7d0483ae72b2e9280cc031fe1ec81c2 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a01e3d760349e202d35994f17df747ddd9e7624e8b0e5e4542378b5a07cc9010 +size 4975267626 diff --git a/mp16/consolidated.02/consolidated-00004-of-00011.pth b/mp16/consolidated.02/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..16bf5801cc845cbe18db59f8dfdfd53744b20045 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddeee00893946f24826fc1274fea985f2ebb25b84bfbabc427b0e0b20025fef +size 4941712784 diff --git a/mp16/consolidated.02/consolidated-00005-of-00011.pth b/mp16/consolidated.02/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c2890d4b54c1bd35cabbc4563d15604579a0906 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40409b5abb2b192901687c234742cfdf0cd836503d7c0f00a382a3a4c0570b7 +size 4983722986 diff --git a/mp16/consolidated.02/consolidated-00006-of-00011.pth b/mp16/consolidated.02/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef4a6c9b0af3a4e31cf8359ddf702312b1e44d10 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c843d5cae0f6e1a10130d0a440ee33b6d291902a477cccfe075a4a069fba81 +size 4975267626 diff --git a/mp16/consolidated.02/consolidated-00007-of-00011.pth b/mp16/consolidated.02/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b438a49dd16f5179795388bf32fb0324293c6501 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00b5cc6f41e17e12df23de747f35bc0adfede744b946526cc3f6cf1929f5e4f +size 4941712784 diff --git a/mp16/consolidated.02/consolidated-00008-of-00011.pth b/mp16/consolidated.02/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef9002ef86e8eb158a1af0e934588e9d014eb2b2 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c413bab9bf2e44178f6ebc0c75b6d9ac564f317f62597d242a243c108f007d6b +size 4983722986 diff --git a/mp16/consolidated.02/consolidated-00009-of-00011.pth b/mp16/consolidated.02/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7776e8f76126b9744003f1d4df0abcf79bb533f0 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55c66e3429d3b1d19967743120afc3364b3884190f675b6aa2c37410280ef34 +size 4975267754 diff --git a/mp16/consolidated.02/consolidated-00010-of-00011.pth b/mp16/consolidated.02/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d2c9f4c2f0ef4a33245f51a6b8ba717a4101106 --- /dev/null +++ b/mp16/consolidated.02/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b27d6d52bae8abcd5fd1c21fb0a31bf6d123900c64934b00846314c43b7c092 +size 4941712912 diff --git a/mp16/consolidated.02/consolidated-00011-of-00011.pth b/mp16/consolidated.02/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0f95b11c5ad57c936f062d13e89049e413dde3c --- /dev/null +++ b/mp16/consolidated.02/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae9b9e5f5a2eae613a7e27450fb6da353590f879407754833389346abf8d225 +size 1579985478 diff --git a/mp16/consolidated.02/consolidated.pth.index.json b/mp16/consolidated.02/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.02/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.03/consolidated-00001-of-00011.pth b/mp16/consolidated.03/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0e0e6f8b6b4ca741609cf991de00712beb9f6a5 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae438de9028a8a5c419e8a3e40d370d23d709290ecdb4ec4d73c39bc3faecb60 +size 4986210686 diff --git a/mp16/consolidated.03/consolidated-00002-of-00011.pth b/mp16/consolidated.03/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..c72c88c8a516de744c2ed04bfea9c7479e91be29 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb0e2a285add863ba639be41bead6876311079479a9d7e7d012463e71536625 +size 4983722986 diff --git a/mp16/consolidated.03/consolidated-00003-of-00011.pth b/mp16/consolidated.03/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..197207981ae5ca41a8862faaf04d2ac1c1cc221f --- /dev/null +++ b/mp16/consolidated.03/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18522c03f2a9248f7f9b4ec60bfc053d390cdec6265afffe8bc4144635e3d8e +size 4975267626 diff --git a/mp16/consolidated.03/consolidated-00004-of-00011.pth b/mp16/consolidated.03/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b1a666c6f38b0ff643f2fb3c33362b446c93d6a2 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c08486400a6aa0cc0354323833d0bc5fb7875ade583a13dddb5684203fd6fe5c +size 4941712784 diff --git a/mp16/consolidated.03/consolidated-00005-of-00011.pth b/mp16/consolidated.03/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d5f5d0fd9167f51e06963ae8c10e1a5d84b017a --- /dev/null +++ b/mp16/consolidated.03/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a287c02e9087514e4349e14db859eaff856e22d045a13fce85097f04af414097 +size 4983722986 diff --git a/mp16/consolidated.03/consolidated-00006-of-00011.pth b/mp16/consolidated.03/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e0a4f1f221d26ba2cd2f883e9af97a650104e06 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e8b246537ed8b869fc1950fe116f2de3a84bedbd0392e68df4f8ebee50528f +size 4975267626 diff --git a/mp16/consolidated.03/consolidated-00007-of-00011.pth b/mp16/consolidated.03/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1e65e5e2aae9efb0dc9754330b1c1a8afebc1b9 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2114769f0546542e3dc6aaa5998d8d2c4889070294ab145649b64e716581e7ab +size 4941712784 diff --git a/mp16/consolidated.03/consolidated-00008-of-00011.pth b/mp16/consolidated.03/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..783f13eb90c1db66ab1136f8f53b7ad7c6db02da --- /dev/null +++ b/mp16/consolidated.03/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf17d469f1feaadf1a431dd80865e48f39b6e23d8fcd0caa2e332180bb2d13f9 +size 4983722986 diff --git a/mp16/consolidated.03/consolidated-00009-of-00011.pth b/mp16/consolidated.03/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..965744afc6f0a7961eeb99c04e7fc82e4639ab52 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e6a0ce1f7010f376a6f530231abd0d91d86874d109619b2906f94070a60a190 +size 4975267754 diff --git a/mp16/consolidated.03/consolidated-00010-of-00011.pth b/mp16/consolidated.03/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..2da3bd77ea7d63b61e57f60d83123a69c7e961f5 --- /dev/null +++ b/mp16/consolidated.03/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5392bfb4b03a5b1a73f6ca6b6148179bd9c9251a9c6e50cd95eac6672945fed +size 4941712912 diff --git a/mp16/consolidated.03/consolidated-00011-of-00011.pth b/mp16/consolidated.03/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec73d8abe7072c0ed6cfb4f2052262a4e874907d --- /dev/null +++ b/mp16/consolidated.03/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2aadb1c26b4d5d4970007d6d3770cd7925b2695057ded80345cf5c347e24bb7 +size 1579985478 diff --git a/mp16/consolidated.03/consolidated.pth.index.json b/mp16/consolidated.03/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.03/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.04/consolidated-00001-of-00011.pth b/mp16/consolidated.04/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..c0caf4f008ac4dd325cb837a9c4f4d50f49befbb --- /dev/null +++ b/mp16/consolidated.04/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57aed1871dac2ea5c7118fe5c3db8ce275b383570280e3535794e0b4e403441 +size 4986210686 diff --git a/mp16/consolidated.04/consolidated-00002-of-00011.pth b/mp16/consolidated.04/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..daa99e41069c87f0595b45dcef40d8de6347a0d3 --- /dev/null +++ b/mp16/consolidated.04/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a334c17f5983019b307b897116f247cc42bc2c8787f59c0344e1da634aa8d6 +size 4983722986 diff --git a/mp16/consolidated.04/consolidated-00003-of-00011.pth b/mp16/consolidated.04/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..49587fb310ff4744d625eb5b8e00e6848198f595 --- /dev/null +++ b/mp16/consolidated.04/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a539d75cb331d55af7b8bafe56a84b0de96420ae488ec561905a57177c26dd +size 4975267626 diff --git a/mp16/consolidated.04/consolidated-00004-of-00011.pth b/mp16/consolidated.04/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2e74c17b93740907bf5765b3854fbf509a29beb --- /dev/null +++ b/mp16/consolidated.04/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201cbd0f37b710a9fe262b511349722eb51419837acc63a262af805372d774d3 +size 4941712784 diff --git a/mp16/consolidated.04/consolidated-00005-of-00011.pth b/mp16/consolidated.04/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..747f5e74c0806dcca22769afe1c15ffc68fcf48f --- /dev/null +++ b/mp16/consolidated.04/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd90f5ca7ef9f97f1a5a4d72da74d81d321546172e50fcd6f1908156f567e55 +size 4983722986 diff --git a/mp16/consolidated.04/consolidated-00006-of-00011.pth b/mp16/consolidated.04/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fb198e8df875f5cc6688a5ecdf19eb3162953bb --- /dev/null +++ b/mp16/consolidated.04/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac494ca19e65243000dc743e624e0259f73f0d98d5a9b719262874fec3e12d06 +size 4975267626 diff --git a/mp16/consolidated.04/consolidated-00007-of-00011.pth b/mp16/consolidated.04/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..af6e094eeead1a4b4ee4c64adbfe01febf23cd20 --- /dev/null +++ b/mp16/consolidated.04/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bcc508fb15d6449a084fb6ed2fef94c717236524173f7dfab874b048e2c9fab +size 4941712784 diff --git a/mp16/consolidated.04/consolidated-00008-of-00011.pth b/mp16/consolidated.04/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e8be32d28aba8f3d3b1946270584588d4e9be74 --- /dev/null +++ b/mp16/consolidated.04/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4381c317a423ce38c23412423aea855969f47f3fe813dbc385b83e5ff987de +size 4983722986 diff --git a/mp16/consolidated.04/consolidated-00009-of-00011.pth b/mp16/consolidated.04/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..46013da3b6c86a18968a2dde92d9700031490ed0 --- /dev/null +++ b/mp16/consolidated.04/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561876fcff4eaddf91ef290999c249ee3f0fab54bec1da68e5e7a75edd8efc20 +size 4975267754 diff --git a/mp16/consolidated.04/consolidated-00010-of-00011.pth b/mp16/consolidated.04/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..3769eec47311998ae530e8e0e73db8325a23f37e --- /dev/null +++ b/mp16/consolidated.04/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa89ef5fe74bc51ff64bd3bf3b9ba99a35e08e52c7006f58dce6285dff2810aa +size 4941712912 diff --git a/mp16/consolidated.04/consolidated-00011-of-00011.pth b/mp16/consolidated.04/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..53d9f8239b7701fe269419075c3f0bfd74c2df83 --- /dev/null +++ b/mp16/consolidated.04/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dba76637b353a6fd50f64804311173da517e98be004209f6debd40d3d4378fb +size 1579985478 diff --git a/mp16/consolidated.04/consolidated.pth.index.json b/mp16/consolidated.04/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.04/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.05/consolidated-00001-of-00011.pth b/mp16/consolidated.05/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f3ba623ad1f7555aa0b45edc6a280b3cc2f44b37 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b03c662da7390bf62ec3c00977a7db7d57ef028f0c528eb1b5ee08b7e7a8d905 +size 4986210686 diff --git a/mp16/consolidated.05/consolidated-00002-of-00011.pth b/mp16/consolidated.05/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6597b57732967566eef0c82d92b286bf188bab96 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7774a2988d29cd4c021910ac6d69aaf48c989fc767352417d1b6a0946de2f24b +size 4983722986 diff --git a/mp16/consolidated.05/consolidated-00003-of-00011.pth b/mp16/consolidated.05/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..45dfbc6f6b9201ac5da627e84d60dcbb4b33cbb9 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36b87237ad62e85966d981ac5a11ab8a9bb4874cadbc726466da423c3fcb898 +size 4975267626 diff --git a/mp16/consolidated.05/consolidated-00004-of-00011.pth b/mp16/consolidated.05/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d0276eab1c32c598b3c7f9bcc53b6d7076b8a38 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7264711499a547fbe4e09cdf52024c745714f3dee987897d9a4f6dd0ccf62c42 +size 4941712784 diff --git a/mp16/consolidated.05/consolidated-00005-of-00011.pth b/mp16/consolidated.05/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..e88cb5dd85e87fc03462d939858dd5e16e93e76f --- /dev/null +++ b/mp16/consolidated.05/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce961803da60294479b5938385cc9e96b12da1af69986cbc506e2c9482d330dc +size 4983722986 diff --git a/mp16/consolidated.05/consolidated-00006-of-00011.pth b/mp16/consolidated.05/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..59b74040e11e7980e74c2dc97822bac6f066b755 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f170c3a24b447a9b1d11b01448c353e72fe8d8a4781f72692e6a88ddef6d46f +size 4975267626 diff --git a/mp16/consolidated.05/consolidated-00007-of-00011.pth b/mp16/consolidated.05/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d321734ff0864e1c23fc59afee1636e16c78344 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a012a66e2e13b744417998786c9b8f9656d9b0c7c2eb8f237d2aab8dfbae49bf +size 4941712784 diff --git a/mp16/consolidated.05/consolidated-00008-of-00011.pth b/mp16/consolidated.05/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac6815f26252f24b15095141cca109cfea81d4c1 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4890bf5f72574569d2761616d7a1e6d521c24fdf7e2a88d3b0dd9e5aeb163af3 +size 4983722986 diff --git a/mp16/consolidated.05/consolidated-00009-of-00011.pth b/mp16/consolidated.05/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..53de2c027c64cb874c8ed31bd145593707bfe6aa --- /dev/null +++ b/mp16/consolidated.05/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5e4a0181f6b7451548004bb471dae056fcc349e967395a5526665bfadf5702 +size 4975267754 diff --git a/mp16/consolidated.05/consolidated-00010-of-00011.pth b/mp16/consolidated.05/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2efcf99070602d1d99c0cdf621d32bbdcfd4308 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd983f1b284b4b42bb487b10f440914c1b5615b870ea33e0e16e12eab2a0eb2 +size 4941712912 diff --git a/mp16/consolidated.05/consolidated-00011-of-00011.pth b/mp16/consolidated.05/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..49d09c493ea2d4180cddb152f58fb8f008a418b9 --- /dev/null +++ b/mp16/consolidated.05/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287d299ddacbf17b9ed852d3f5655f49f395da21a395b09eac43f5d33d91d30e +size 1579985478 diff --git a/mp16/consolidated.05/consolidated.pth.index.json b/mp16/consolidated.05/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.05/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.06/consolidated-00001-of-00011.pth b/mp16/consolidated.06/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..822232257686e160e0fb869773e3516d57daf47b --- /dev/null +++ b/mp16/consolidated.06/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7669dd5d195eb894eb994c604006cb599ba427ae155c20f376dced1d7746cced +size 4986210686 diff --git a/mp16/consolidated.06/consolidated-00002-of-00011.pth b/mp16/consolidated.06/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9373e46ee5034da4c4f514a6984a1c4ca2bfef8c --- /dev/null +++ b/mp16/consolidated.06/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9fb90f641bda2cdf692e1d105bddbe500b9c44a1673786bad6f8b9d6b2899f1 +size 4983722986 diff --git a/mp16/consolidated.06/consolidated-00003-of-00011.pth b/mp16/consolidated.06/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..48b17cb215d7158642ea0de8d0f1716ae17f535a --- /dev/null +++ b/mp16/consolidated.06/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed80259064c0c6dece5577b6e0959f385331eb731ea5b34e6b017c8253bb05b8 +size 4975267626 diff --git a/mp16/consolidated.06/consolidated-00004-of-00011.pth b/mp16/consolidated.06/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c46209b1bcf55edf13d092eff758149ede38db7 --- /dev/null +++ b/mp16/consolidated.06/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6976ca9b062b64a5550c2878cdd017e6f6f55e4e13131d3ed9eccd7fc9244eb7 +size 4941712784 diff --git a/mp16/consolidated.06/consolidated-00005-of-00011.pth b/mp16/consolidated.06/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f125a9636256e41b6330ff5604f4956e383be43a --- /dev/null +++ b/mp16/consolidated.06/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f0f10420a99ce8914b75be0215710424784741b6f5405c3e931e83399b452c +size 4983722986 diff --git a/mp16/consolidated.06/consolidated-00006-of-00011.pth b/mp16/consolidated.06/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..04dce6f8ed5984abfea7e6779fb31490a5d366cd --- /dev/null +++ b/mp16/consolidated.06/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b6b7a966e509905e0cbc31ac6b66da7e001d2b59c1dc1bb161e43ea83a24ce +size 4975267626 diff --git a/mp16/consolidated.06/consolidated-00007-of-00011.pth b/mp16/consolidated.06/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..99cc2b01d9e455ee52f72ad33654d0a4cd394b30 --- /dev/null +++ b/mp16/consolidated.06/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea461f640eb1527ef11eb09075f1e06cffcba5bf30facfa79da780274374617 +size 4941712784 diff --git a/mp16/consolidated.06/consolidated-00008-of-00011.pth b/mp16/consolidated.06/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd3c266b14f19b005ff0cdb85e53a61becb657a9 --- /dev/null +++ b/mp16/consolidated.06/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9a730616e14674a9fb1cda4d90ae4b65011c2e6408ecc2e7eb1fdb6e3480fc +size 4983722986 diff --git a/mp16/consolidated.06/consolidated-00009-of-00011.pth b/mp16/consolidated.06/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..aea511a87964f450d776f177d14cf85f8131d514 --- /dev/null +++ b/mp16/consolidated.06/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07d6fef84f342f95351b0be378b32ec36612b69c639189aa97e883fd8c6b837 +size 4975267754 diff --git a/mp16/consolidated.06/consolidated-00010-of-00011.pth b/mp16/consolidated.06/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..1753f9b1417e84c7377b49d32df00ca74535f9a4 --- /dev/null +++ b/mp16/consolidated.06/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d50931dd483752b87d88d4dddef031ba46a005bfa0b9e4310244604b72d830 +size 4941712912 diff --git a/mp16/consolidated.06/consolidated-00011-of-00011.pth b/mp16/consolidated.06/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b561b0c913ddd93bca624247d467a51db97c5324 --- /dev/null +++ b/mp16/consolidated.06/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7da4f9e0fc911fbb1c88b640c03bf11b771dd4c0a9b437454465c9d4195bf5e +size 1579985478 diff --git a/mp16/consolidated.06/consolidated.pth.index.json b/mp16/consolidated.06/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.06/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.07/consolidated-00001-of-00011.pth b/mp16/consolidated.07/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a79ee5bf2cff8e53b964c36d051311e1e11e71b --- /dev/null +++ b/mp16/consolidated.07/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81bddf5c015742889e0b13868610f428995161602f370032f28de17e9eec6821 +size 4986210686 diff --git a/mp16/consolidated.07/consolidated-00002-of-00011.pth b/mp16/consolidated.07/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1d46856ef7e60c09ae961ceca10fb359c8f711e --- /dev/null +++ b/mp16/consolidated.07/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675daed556e553aafdbe09340aa2b12b433e409210cf8be70afde0081e7d350c +size 4983722986 diff --git a/mp16/consolidated.07/consolidated-00003-of-00011.pth b/mp16/consolidated.07/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a2d2b8fe130531950a99e8db8f422b5e3d0c656 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d580a578ad9aa29ed7492fa38ba2d6af8d0b9536ca6d4a6fa4176b24b6ac3d +size 4975267626 diff --git a/mp16/consolidated.07/consolidated-00004-of-00011.pth b/mp16/consolidated.07/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..174c2faf14e267acea39e07497569a63fe2cd271 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a0bc9d3c7d6d57ffff909e27b5a736cb13ecee0df8ba2391b5904cd74ad77e +size 4941712784 diff --git a/mp16/consolidated.07/consolidated-00005-of-00011.pth b/mp16/consolidated.07/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..39196fb40944e116d5b37fd13ceee13375439a02 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c8866f78e47b5458b0a5b2ce609842a2b137f64cc4607b7e704da78dd37921 +size 4983722986 diff --git a/mp16/consolidated.07/consolidated-00006-of-00011.pth b/mp16/consolidated.07/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..83597d0ed978abae94e53ec3e27982abe33554e7 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aae06391260eebda7e90eadab6978b79fc4a2595ccd95d4550463645da1a8bc +size 4975267626 diff --git a/mp16/consolidated.07/consolidated-00007-of-00011.pth b/mp16/consolidated.07/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..06b7d4663b9409dc6827d041c020b44da26c07b7 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e3f3bca3508acfa075e7f0bbf5b27380ddd8fb43fe99a2f117bef76babc1faa +size 4941712784 diff --git a/mp16/consolidated.07/consolidated-00008-of-00011.pth b/mp16/consolidated.07/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e68cb049fa5388db9a317ca2421a38eb9452e67 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb223e632c94e782e9108f977bc2b51228fcb19667940f3719fb5610331e3fbc +size 4983722986 diff --git a/mp16/consolidated.07/consolidated-00009-of-00011.pth b/mp16/consolidated.07/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..87f4f42ba2eb64fe9d427f3ec5ea6738cdfab80a --- /dev/null +++ b/mp16/consolidated.07/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92d1da90b454138bb955d4612bab9e504f652f135c727f738b525442c6bdeb7 +size 4975267754 diff --git a/mp16/consolidated.07/consolidated-00010-of-00011.pth b/mp16/consolidated.07/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..0955e68dd37019a721be57f935d865c1d34b17ee --- /dev/null +++ b/mp16/consolidated.07/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa694a52093dc00fc2cd3f3db1a792c36841380e477afdd564b313030659b3b +size 4941712912 diff --git a/mp16/consolidated.07/consolidated-00011-of-00011.pth b/mp16/consolidated.07/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d5eca28d9cec608cf7f77bc481ac50fe723103c8 --- /dev/null +++ b/mp16/consolidated.07/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed1cbb99dfbf37d6e3a7f481a431e61ffa730896d0d6e46ab029c62f5e4a629 +size 1579985478 diff --git a/mp16/consolidated.07/consolidated.pth.index.json b/mp16/consolidated.07/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.07/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.08/consolidated-00001-of-00011.pth b/mp16/consolidated.08/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..351cccb1ff1897f8bea6581438b02d8406ffd797 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c1d3828363bc658d86ec64965ab16ae264a72103202f29e3bac29952e82805 +size 4986210686 diff --git a/mp16/consolidated.08/consolidated-00002-of-00011.pth b/mp16/consolidated.08/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..aa4660ec3f06910abea65be6e10f561b2fe3a702 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfd3fca7e01176756680d73aa35ed8104661d81616d55e2d1b21c26b4ba3f50 +size 4983722986 diff --git a/mp16/consolidated.08/consolidated-00003-of-00011.pth b/mp16/consolidated.08/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f485b681474ad43ec114bd279fbd4531c8a9e75 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c159528aaf3e95fced991ad6b4b66c71b9a7dad32bd355fdab3ba4cd892f629 +size 4975267626 diff --git a/mp16/consolidated.08/consolidated-00004-of-00011.pth b/mp16/consolidated.08/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0a7c13d9fcbd827d4a33da23cff071155828021 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ca512c12ea75d8db061b2709c7adb4f41736727c8eb9b59f08b0937e137aef +size 4941712784 diff --git a/mp16/consolidated.08/consolidated-00005-of-00011.pth b/mp16/consolidated.08/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..58e41b4542e3df89abf7dba7e18f856cf3f2c339 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968b12a958581118e8edd0d0c55d0c1218642883a23fe30612a3999768091b8b +size 4983722986 diff --git a/mp16/consolidated.08/consolidated-00006-of-00011.pth b/mp16/consolidated.08/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff679232a1e99979e12a2ea9603c12f41d1f7d26 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9e50aad455ff3bddfcdb878693d01a1c93e1af9b74a0b176d15ccca2c105a1 +size 4975267626 diff --git a/mp16/consolidated.08/consolidated-00007-of-00011.pth b/mp16/consolidated.08/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6f5b89e66793b82135af08b1c0b15eef0f0fb07 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b539afabba07e167a1d3a18e3fb153c57133a55b794390cca1b763a8d7ee56 +size 4941712784 diff --git a/mp16/consolidated.08/consolidated-00008-of-00011.pth b/mp16/consolidated.08/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..8844c8c83923c5d430d3ca0f6c134c01c2a940d6 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6005e68f61c4a7be138a5a3307f8aa721bc2dae439c675d36f4336751964b1a8 +size 4983722986 diff --git a/mp16/consolidated.08/consolidated-00009-of-00011.pth b/mp16/consolidated.08/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..40299ee87a451ddc48a6e601568bdd055ca6adf4 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5975a33211bdfab3df7a09504d55989f05c5b2a0b7821b5b9a1cfaa39d9b99a7 +size 4975267754 diff --git a/mp16/consolidated.08/consolidated-00010-of-00011.pth b/mp16/consolidated.08/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b3fc5bb11e3b9fd607946223642b1e971bc3dac0 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d55c1096d7a1ec4f1f89a7f98ff4ec26e0148e6e7a9237bb09a73d206741e2 +size 4941712912 diff --git a/mp16/consolidated.08/consolidated-00011-of-00011.pth b/mp16/consolidated.08/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..11b6bfa9565203a0d7cc4a4c360322a1cd599989 --- /dev/null +++ b/mp16/consolidated.08/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab27f831b12967cb019a38bcafba596807a3d29b34642450a3914c94b2f3011 +size 1579985478 diff --git a/mp16/consolidated.08/consolidated.pth.index.json b/mp16/consolidated.08/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.08/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.09/consolidated-00001-of-00011.pth b/mp16/consolidated.09/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a349c1d01b80452a9bd79ae6a8c2a6d6168df03 --- /dev/null +++ b/mp16/consolidated.09/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61fe946d22644b23eacf15a31c49bb0e7b1b886acdab85ce5664284f6cb66f8 +size 4986210686 diff --git a/mp16/consolidated.09/consolidated-00002-of-00011.pth b/mp16/consolidated.09/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d069e683169d3c6d27beb715d63bc8936200953 --- /dev/null +++ b/mp16/consolidated.09/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06fe868e600b6c4feed7681b33afa8e22bd4b4d1e94fded3e297dec9b728219 +size 4983722986 diff --git a/mp16/consolidated.09/consolidated-00003-of-00011.pth b/mp16/consolidated.09/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7bb12cfda0c1112ff6476fc6b2786bd0cf14f3fa --- /dev/null +++ b/mp16/consolidated.09/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5dc10b77eb0ce0710d51304434cbd90d0a64febb6db370c1b4658006ca1ff4 +size 4975267626 diff --git a/mp16/consolidated.09/consolidated-00004-of-00011.pth b/mp16/consolidated.09/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..de4ed3750861b741784b7d9cc97396a36b058ecf --- /dev/null +++ b/mp16/consolidated.09/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a0ec92640e73a0e5f5ebca7fec24d53b425625804cc87dafb6e4c6178dc2d0 +size 4941712784 diff --git a/mp16/consolidated.09/consolidated-00005-of-00011.pth b/mp16/consolidated.09/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..043479b6b104636eed5bfe3b8858d8658b9edab0 --- /dev/null +++ b/mp16/consolidated.09/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96db77ec3a1c3e854b8c61ece311a999a908a0d1861e6cb74436e182f35b3ff0 +size 4983722986 diff --git a/mp16/consolidated.09/consolidated-00006-of-00011.pth b/mp16/consolidated.09/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d6d1a180670ac05b6d03998ee23fd1533d916d8 --- /dev/null +++ b/mp16/consolidated.09/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9743e00d8432c4151abd5c8e5cf275e32092e5c92da599cb2a1844c7ecfd672 +size 4975267626 diff --git a/mp16/consolidated.09/consolidated-00007-of-00011.pth b/mp16/consolidated.09/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc805fc93d6b9143b12f1e1c085496798ce57d7a --- /dev/null +++ b/mp16/consolidated.09/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc59267185ae4819a88ed73b1fdc03e1ec2a1d59dc4598860bdac3a3bf635808 +size 4941712784 diff --git a/mp16/consolidated.09/consolidated-00008-of-00011.pth b/mp16/consolidated.09/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..1924efedd5fcb5378ec7a7593acaa5a903956145 --- /dev/null +++ b/mp16/consolidated.09/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be568c6ea82436b176b97efe14016ddf72dc35fe23442e8101b219e4060d9f59 +size 4983722986 diff --git a/mp16/consolidated.09/consolidated-00009-of-00011.pth b/mp16/consolidated.09/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..65e4a9953a9f1f5041effe9ced983e6bbe40f98e --- /dev/null +++ b/mp16/consolidated.09/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa640dbdf725ad293e4f0ac6535d51ecc11c22aec3c48b5199ebdb7e36d7bd43 +size 4975267754 diff --git a/mp16/consolidated.09/consolidated-00010-of-00011.pth b/mp16/consolidated.09/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..369ad62e09ecf776d273f866916fee5436cbf6be --- /dev/null +++ b/mp16/consolidated.09/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f3f86225433e0b6e4eec10c6ad0da374c8534bf32dea51c88cfed033f22d57 +size 4941712912 diff --git a/mp16/consolidated.09/consolidated-00011-of-00011.pth b/mp16/consolidated.09/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ac868c89028a9b029cb641bc56d6001345abd29 --- /dev/null +++ b/mp16/consolidated.09/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312c56b2db55dd092e9e91ae0bdada47e73bc70382f6666babc19457e60ffeb9 +size 1579985478 diff --git a/mp16/consolidated.09/consolidated.pth.index.json b/mp16/consolidated.09/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.09/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.10/consolidated-00001-of-00011.pth b/mp16/consolidated.10/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..81d251e3d6e8b12bf3d7328dae7038cdbbaf52a0 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611cfb59617a53f560bb821975e72f589f67245b09b29efab9bcfb9100cf5a2c +size 4986210686 diff --git a/mp16/consolidated.10/consolidated-00002-of-00011.pth b/mp16/consolidated.10/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f49cd45b63d56f74f17ea26136c09ad976e2ade2 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a87cfb8670fa3b152ee350cd694d6fe62a9e00ec71b8d4b46fbd9f6bbd55103 +size 4983722986 diff --git a/mp16/consolidated.10/consolidated-00003-of-00011.pth b/mp16/consolidated.10/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2b5df84278d1d57b246348f1b3586c20efac19e --- /dev/null +++ b/mp16/consolidated.10/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8ea8b0d7383aaf5fb4672c943b0959a82501bc12797d17040e0fd69872bdcd +size 4975267626 diff --git a/mp16/consolidated.10/consolidated-00004-of-00011.pth b/mp16/consolidated.10/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..370e04dc2770483594afcc89d2444342fcafaa28 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93afab7970804d412c45501bef2a51a77b98e67c53d4aa4544d37f5f18d9160 +size 4941712784 diff --git a/mp16/consolidated.10/consolidated-00005-of-00011.pth b/mp16/consolidated.10/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9140220681d38951780ba6d0efcba51a709ce90 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc00d2aeff3d562302bc71f490209c912ac3fbbb4b1f220658efbe73d342e5ec +size 4983722986 diff --git a/mp16/consolidated.10/consolidated-00006-of-00011.pth b/mp16/consolidated.10/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..872aca4acd0eff0a67ad3a5cec7ae179e1b3f486 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15358f7a44282fd6362dd64e85bd0c9b566d6268aeece008c1a869dcc16122bf +size 4975267626 diff --git a/mp16/consolidated.10/consolidated-00007-of-00011.pth b/mp16/consolidated.10/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..33186300d6d1d512557b80832178da13ccd41f11 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf53c1759772e856d30b055699061eb17008d383143a86b76ab7dbfe1ba4f1c7 +size 4941712784 diff --git a/mp16/consolidated.10/consolidated-00008-of-00011.pth b/mp16/consolidated.10/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..81ee49666367080de1bb7d302f773cd653a4ee1d --- /dev/null +++ b/mp16/consolidated.10/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482c59b21c44e361e983c81aa3ed00e0cae6763da6b2d3f89873d597f7c708f5 +size 4983722986 diff --git a/mp16/consolidated.10/consolidated-00009-of-00011.pth b/mp16/consolidated.10/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6bef6eec1fa095037313acc96b3848ec22b3d5ee --- /dev/null +++ b/mp16/consolidated.10/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c297fa53ec07c45bd6ec745b35c6abef5c4a67265bd40cef01e65cb1aed4006 +size 4975267754 diff --git a/mp16/consolidated.10/consolidated-00010-of-00011.pth b/mp16/consolidated.10/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d05a3fad8842dd62c537861da57cfa6e227e681 --- /dev/null +++ b/mp16/consolidated.10/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918bcd64a3bb549b3b17aead2c6d76b629399634e01da0ed8adc940d44797cf0 +size 4941712912 diff --git a/mp16/consolidated.10/consolidated-00011-of-00011.pth b/mp16/consolidated.10/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b257d209248cfca54e8f180cbf2d243db75b056c --- /dev/null +++ b/mp16/consolidated.10/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756a6891aab458bf556cec687b3d3491f45ff2c1970f92f9c0ec7f33eb82fcff +size 1579985478 diff --git a/mp16/consolidated.10/consolidated.pth.index.json b/mp16/consolidated.10/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.10/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.11/consolidated-00001-of-00011.pth b/mp16/consolidated.11/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c25c80304ac8eebaa1f31cf492fb3d74428e69e --- /dev/null +++ b/mp16/consolidated.11/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7e23b1679941b7879b5b07aba1823c7d4e5392977e0bf88aff1b9732c6042c +size 4986210686 diff --git a/mp16/consolidated.11/consolidated-00002-of-00011.pth b/mp16/consolidated.11/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..1dceccaf20508a2eb4cdce9bafbb7c03dfed4bb6 --- /dev/null +++ b/mp16/consolidated.11/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a745f4027a0f408af6ae8827f1223851b1a283bc73af564b8b24bbdc371d9afa +size 4983722986 diff --git a/mp16/consolidated.11/consolidated-00003-of-00011.pth b/mp16/consolidated.11/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..e67972220257b52e66ebb8dd62931b01f2fde694 --- /dev/null +++ b/mp16/consolidated.11/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25b7f6d1a6856d0d51cbc339d059323677a6023ea0e4dc091e2b649bd59061d +size 4975267626 diff --git a/mp16/consolidated.11/consolidated-00004-of-00011.pth b/mp16/consolidated.11/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5d0c0fab80d942defe9819a3a0e7f79df629b2d --- /dev/null +++ b/mp16/consolidated.11/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db590f378afdccf03088e4fd996491446c39e3aa8f185a27273deae89955d15 +size 4941712784 diff --git a/mp16/consolidated.11/consolidated-00005-of-00011.pth b/mp16/consolidated.11/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d9490f5d7d3203d6164bb9bbf116f2b10a1327f --- /dev/null +++ b/mp16/consolidated.11/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd18a7dd29d04a99d5ff7cafa1eb5d312510fba20262cb1ae8f28d60a7071fcc +size 4983722986 diff --git a/mp16/consolidated.11/consolidated-00006-of-00011.pth b/mp16/consolidated.11/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..b979974594df2d789d53f06a6e50045f0ba7bc54 --- /dev/null +++ b/mp16/consolidated.11/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f06197943ef912d1c19aef7622cfa5d85376937b129edbd6baa5879d6e4bbf +size 4975267626 diff --git a/mp16/consolidated.11/consolidated-00007-of-00011.pth b/mp16/consolidated.11/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..85f73574ee46de9db8900a921247f391ea7bbfec --- /dev/null +++ b/mp16/consolidated.11/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60f0b2ac5b7e75032150d1f9306a407abbf689d0c073c218c0d0cd3d352d1de +size 4941712784 diff --git a/mp16/consolidated.11/consolidated-00008-of-00011.pth b/mp16/consolidated.11/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f084624a32f229fb44c1985458b2da7074e10be --- /dev/null +++ b/mp16/consolidated.11/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c602156ce2c4d0211f5cd35142428ff3f59e063b7d8557c9a84aa764f5a322 +size 4983722986 diff --git a/mp16/consolidated.11/consolidated-00009-of-00011.pth b/mp16/consolidated.11/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfe247a6b77794a886485d43fb44312b14234a44 --- /dev/null +++ b/mp16/consolidated.11/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea71f1ff3f4581ab41890f16475c1985b1b83d8fade92fadf27057d7dec2371 +size 4975267754 diff --git a/mp16/consolidated.11/consolidated-00010-of-00011.pth b/mp16/consolidated.11/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..22ce5e169e34ba1e42deceb7d1ad3b1c99382bf0 --- /dev/null +++ b/mp16/consolidated.11/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b984b06697e8238a30ba3ed2dee87cd7e86120c686fb4fb2f1474db26f993567 +size 4941712912 diff --git a/mp16/consolidated.11/consolidated-00011-of-00011.pth b/mp16/consolidated.11/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c94fbae1ef6302f8c8b3a02cb26f5e068a65d5c --- /dev/null +++ b/mp16/consolidated.11/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d5cbb2a3c9fc9cccb8a2680607150d90b1950042ef67f04296073c91dd8e86c +size 1579985478 diff --git a/mp16/consolidated.11/consolidated.pth.index.json b/mp16/consolidated.11/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.11/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.12/consolidated-00001-of-00011.pth b/mp16/consolidated.12/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6835a4f53bc00e6c58deaa92ddfd6046f6a36920 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2220e5cfa9899f53510e834163a16056a82070d5ef4378c41b4085c2df2f9b09 +size 4986210686 diff --git a/mp16/consolidated.12/consolidated-00002-of-00011.pth b/mp16/consolidated.12/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3660ceab6787c2d77b95921aa52aef841af2f82 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f2837f6a7466cd99a6ee7bca1ecf10e4b9f4e1949cdd445cc61aa2af064768 +size 4983722986 diff --git a/mp16/consolidated.12/consolidated-00003-of-00011.pth b/mp16/consolidated.12/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..53c70b974e61dbaa0053291753f3c0cd253e3f15 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0918d6360caeaf273bd9affaf7d9d0782104d76e224576e962f7720bf699bec +size 4975267626 diff --git a/mp16/consolidated.12/consolidated-00004-of-00011.pth b/mp16/consolidated.12/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6123b19f8d815300f18db740d425fd5dd05d01e2 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8e7260bbf0f90ba89ced1d784bf97a577430e5c0d578d90b28b3d2eb522413 +size 4941712784 diff --git a/mp16/consolidated.12/consolidated-00005-of-00011.pth b/mp16/consolidated.12/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21754c8fdf440211f4f3bccfb2da318c837b91d --- /dev/null +++ b/mp16/consolidated.12/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94ef34e2b0cf51d8424e0ce822c21eb326f3ee6c25a77091d05152f570a8606 +size 4983722986 diff --git a/mp16/consolidated.12/consolidated-00006-of-00011.pth b/mp16/consolidated.12/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..87e70c5e2a4debc67e402e6d6f51a9bc41044f14 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950871e776c3e426ac9df782abf646c1c6cfab2f6a980128db66518f0ab225ea +size 4975267626 diff --git a/mp16/consolidated.12/consolidated-00007-of-00011.pth b/mp16/consolidated.12/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e0b0056e47721c736abd374908b5158741c5a2b --- /dev/null +++ b/mp16/consolidated.12/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290da63e9e64639a6c089e37607539d0210c0d476c4d5ca3e2c1abd2ebab89d0 +size 4941712784 diff --git a/mp16/consolidated.12/consolidated-00008-of-00011.pth b/mp16/consolidated.12/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..24206c94d1154ab2f12d97032d8c304bad290559 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:105d8e7fa038a0db330e5b12e1b1298318da7101c71cd9ff7b5ba363ed2f37b3 +size 4983722986 diff --git a/mp16/consolidated.12/consolidated-00009-of-00011.pth b/mp16/consolidated.12/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e23aff2a0c933699e53227af11db61a66ca931b --- /dev/null +++ b/mp16/consolidated.12/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abdce26cac8dc104a515f534ccc0e7a78e54b3ffb371a73c430618f24551c55b +size 4975267754 diff --git a/mp16/consolidated.12/consolidated-00010-of-00011.pth b/mp16/consolidated.12/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..85ec868c75766a95aee6c5a30ef98afa3079ea50 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382c719e77ef864559631296fdb611999f5bcb8ff93822499bc1f85e06af82f9 +size 4941712912 diff --git a/mp16/consolidated.12/consolidated-00011-of-00011.pth b/mp16/consolidated.12/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f23d300d03bc653e7daa57e1c2b582a388ef65f2 --- /dev/null +++ b/mp16/consolidated.12/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c133ca15d60badb0031f103a2429b68fc5ddf51500ec519d85c9cb51f4cd5c6e +size 1579985478 diff --git a/mp16/consolidated.12/consolidated.pth.index.json b/mp16/consolidated.12/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.12/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.13/consolidated-00001-of-00011.pth b/mp16/consolidated.13/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd61595936eb592b8d9715dc5387dc600da55816 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e265311f5a37f0ab7d0570b1045fcc6e640be73097169a577b7f2070c2fc048 +size 4986210686 diff --git a/mp16/consolidated.13/consolidated-00002-of-00011.pth b/mp16/consolidated.13/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..bad456a486841051d4f07e9539782b0c4768f9ea --- /dev/null +++ b/mp16/consolidated.13/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57b8c546fca7320a28461afeaa98098c93a1864274994c957dbc8403bda5c26 +size 4983722986 diff --git a/mp16/consolidated.13/consolidated-00003-of-00011.pth b/mp16/consolidated.13/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..896d13fe2584bc6b8f8aff3b087294b71223466e --- /dev/null +++ b/mp16/consolidated.13/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b1e069e514ba1f8b4d8bd58fee10b5ef5b5a9ceb246c299812cb30a4df3d18 +size 4975267626 diff --git a/mp16/consolidated.13/consolidated-00004-of-00011.pth b/mp16/consolidated.13/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b75025fbbdad5fef1e39dc27592caee76ca2942 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c15223a0422ee8e294275309bc10d8a718c22ac1b71cd58f2ba4a1e8fadc1e +size 4941712784 diff --git a/mp16/consolidated.13/consolidated-00005-of-00011.pth b/mp16/consolidated.13/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc50aff791a6d43caac5a7c07c63a66a8b1a5384 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b850dd38b78dc5bcca5c28bd079306e57e2300aefa1427ee57791068d8da2c1 +size 4983722986 diff --git a/mp16/consolidated.13/consolidated-00006-of-00011.pth b/mp16/consolidated.13/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e75d8d2bd24ff8d2d15fa9fd09d76010abeab48 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aaa722ea36da7d1b91a4ae3f1123ea042ef7eb83e79b663dd7000de71b01cc2 +size 4975267626 diff --git a/mp16/consolidated.13/consolidated-00007-of-00011.pth b/mp16/consolidated.13/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6caea189bff59b47e56789be1cea18ec8e6b4aac --- /dev/null +++ b/mp16/consolidated.13/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d5bd824d8635c29f775f78cc24cff41c0ba8c616fa797fb4c620b3ef0ee090 +size 4941712784 diff --git a/mp16/consolidated.13/consolidated-00008-of-00011.pth b/mp16/consolidated.13/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..db423086743589618042a93e51d76f8a587edc67 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b14b6ed49d4a7913709a562d8d04b039f5e94fce0cc50faf5769db447e1fa110 +size 4983722986 diff --git a/mp16/consolidated.13/consolidated-00009-of-00011.pth b/mp16/consolidated.13/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc55ae0c346acb2b9d7c24689859178f017bace6 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ce26a5f0e0222444583edf022419237f03651c04e921c52bd4f08539eb6830 +size 4975267754 diff --git a/mp16/consolidated.13/consolidated-00010-of-00011.pth b/mp16/consolidated.13/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..186cadf150f58a9b3109c385dce386c624bf1a71 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0eaf02cfe6a8ac8e72ffc16cfd0368ef5a9a6d6e7dce1a9321a03fd370052e8 +size 4941712912 diff --git a/mp16/consolidated.13/consolidated-00011-of-00011.pth b/mp16/consolidated.13/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..58603e4b8deaaec5442cd4509feac67fb4483222 --- /dev/null +++ b/mp16/consolidated.13/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17f2b5ad33145b790a060de0567f5eebe91d5917940166d0da03767d94f2346 +size 1579985478 diff --git a/mp16/consolidated.13/consolidated.pth.index.json b/mp16/consolidated.13/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.13/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.14/consolidated-00001-of-00011.pth b/mp16/consolidated.14/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ed5a9b92b12eb499d2f5d45086054d8ac4c3dbb --- /dev/null +++ b/mp16/consolidated.14/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808657367390fca59a58c10c07512f1f16c13d1695d1cc6f5127e30d07cffc2a +size 4986210686 diff --git a/mp16/consolidated.14/consolidated-00002-of-00011.pth b/mp16/consolidated.14/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1e0e8f43bc8a0feede9a5270bea4a6326069e5b --- /dev/null +++ b/mp16/consolidated.14/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:698fcdd16017dcd834401127d8733a848b30ea85576d20c7e8e108da4773f47f +size 4983722986 diff --git a/mp16/consolidated.14/consolidated-00003-of-00011.pth b/mp16/consolidated.14/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1a4d1663080b8f21bcf68372e5c8e655fa78f98 --- /dev/null +++ b/mp16/consolidated.14/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844ea124931a6108850a500f333161c4467358a52f993d201a7b255c09fcc8bb +size 4975267626 diff --git a/mp16/consolidated.14/consolidated-00004-of-00011.pth b/mp16/consolidated.14/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..f03be43eb830a10ac669051653f1f8a555fa19de --- /dev/null +++ b/mp16/consolidated.14/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de8cab3627acd12e44b5ccf6c2331d9b1ac28c3a8bfedfb4592dfcbf8ad24c6 +size 4941712784 diff --git a/mp16/consolidated.14/consolidated-00005-of-00011.pth b/mp16/consolidated.14/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c68a54c1f90692768d14a4b89454bb4cd18941d --- /dev/null +++ b/mp16/consolidated.14/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3845bb4a324c4dce0f09113c9f320e9cd5fd1021a8436d78faf697fc175b479 +size 4983722986 diff --git a/mp16/consolidated.14/consolidated-00006-of-00011.pth b/mp16/consolidated.14/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..a461abc00e02330da615c48eb72407cf9f775a57 --- /dev/null +++ b/mp16/consolidated.14/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4b771d471c2390544fd095e428a6e749cdaee4db0cefb6a5e23f665ff5fb6c +size 4975267626 diff --git a/mp16/consolidated.14/consolidated-00007-of-00011.pth b/mp16/consolidated.14/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9a5d48b41765dd1fe54a4e0709bee79078b877d --- /dev/null +++ b/mp16/consolidated.14/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ea74813d14a1b6bbf93f5c366e0b3eddc1774d949636ae0997a65478ed9f6c1 +size 4941712784 diff --git a/mp16/consolidated.14/consolidated-00008-of-00011.pth b/mp16/consolidated.14/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c5b0ecadc638db3e6ebc41df7e9242104452cf0 --- /dev/null +++ b/mp16/consolidated.14/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28d6ca53f01df7c657c6a0845a4ff4c503695fcb5b35981e495d45851e075f7 +size 4983722986 diff --git a/mp16/consolidated.14/consolidated-00009-of-00011.pth b/mp16/consolidated.14/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3d9a6dd6c31d501d76f6e57fb00ed237c1aa0e1 --- /dev/null +++ b/mp16/consolidated.14/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b23a22939b7aea804ca72b41cf90498e1f18518765083d64973421b3aa90563 +size 4975267754 diff --git a/mp16/consolidated.14/consolidated-00010-of-00011.pth b/mp16/consolidated.14/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..430c85af82a13f7a409fd45d0a9c23af85c4cfd1 --- /dev/null +++ b/mp16/consolidated.14/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a02b7569f44f61e0ede13ba9c8e4b70720a42e8bce4539286835c98b7abf61a +size 4941712912 diff --git a/mp16/consolidated.14/consolidated-00011-of-00011.pth b/mp16/consolidated.14/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e343531399d7f019ff4defda487ea86d9696a68 --- /dev/null +++ b/mp16/consolidated.14/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b7a121475c03a1f465014bdd4fe6e2519268632cebca65761dbd3c66866bdf +size 1579985478 diff --git a/mp16/consolidated.14/consolidated.pth.index.json b/mp16/consolidated.14/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.14/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/consolidated.15/consolidated-00001-of-00011.pth b/mp16/consolidated.15/consolidated-00001-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfbb6a2a371f678d88dd1942eb11ae793564df4d --- /dev/null +++ b/mp16/consolidated.15/consolidated-00001-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999fdd2367a6c12ad61e24cfcaaff5c08fbe71f7bdd793647a6e79c4faf1ed6c +size 4986210686 diff --git a/mp16/consolidated.15/consolidated-00002-of-00011.pth b/mp16/consolidated.15/consolidated-00002-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..2768a08623dc201f5329b7ac64bd27f9f6d42034 --- /dev/null +++ b/mp16/consolidated.15/consolidated-00002-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:532bee5113544ab7cc05a7156f9d5abf3c3c427b04e21ffe9455854dc75c793b +size 4983722986 diff --git a/mp16/consolidated.15/consolidated-00003-of-00011.pth b/mp16/consolidated.15/consolidated-00003-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ced9fa093829e5fa5c96f8eebb7487e29b37bcd --- /dev/null +++ b/mp16/consolidated.15/consolidated-00003-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae57152efd43b59c894efa09436b2e3591143e4b56faabea3dc51e671f7e8d4 +size 4975267626 diff --git a/mp16/consolidated.15/consolidated-00004-of-00011.pth b/mp16/consolidated.15/consolidated-00004-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..c81e52e3e1a888bf6725b2eae5b18ecb27f9aa6f --- /dev/null +++ b/mp16/consolidated.15/consolidated-00004-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90885d6e64618b705f226e00aca15d77879b845a58a30c2e6e82f3e463287f27 +size 4941712784 diff --git a/mp16/consolidated.15/consolidated-00005-of-00011.pth b/mp16/consolidated.15/consolidated-00005-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..7119867ad4f9673f1e71c99d0b270b3cd6682ace --- /dev/null +++ b/mp16/consolidated.15/consolidated-00005-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc532a615a4ec9c5517db5c009378a5844e9e03b335f441d06282f0195b600f4 +size 4983722986 diff --git a/mp16/consolidated.15/consolidated-00006-of-00011.pth b/mp16/consolidated.15/consolidated-00006-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea44799eb05aa90099d5953e2257d8fe47a80b9b --- /dev/null +++ b/mp16/consolidated.15/consolidated-00006-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e693813c62233dc12846ac2fde359536a76cb5c714ae55c18f22b1533032aa8 +size 4975267626 diff --git a/mp16/consolidated.15/consolidated-00007-of-00011.pth b/mp16/consolidated.15/consolidated-00007-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..6259be69b9986939089dff3ca9020b4ffbd2ba40 --- /dev/null +++ b/mp16/consolidated.15/consolidated-00007-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8dc0619ae2516ca21b9c97ccca52386f3eed608b096baca079212f8b8d8eb03 +size 4941712784 diff --git a/mp16/consolidated.15/consolidated-00008-of-00011.pth b/mp16/consolidated.15/consolidated-00008-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6e63a0c6e40d59a3cedd604a13032acd8ab2095 --- /dev/null +++ b/mp16/consolidated.15/consolidated-00008-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90faa5ce037846368ebb175ddbd9e953e280e347cc4e319dd0138a49f69ed811 +size 4983722986 diff --git a/mp16/consolidated.15/consolidated-00009-of-00011.pth b/mp16/consolidated.15/consolidated-00009-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..e49731cb7b4f27d1c43b873e8c9a3707d0b75679 --- /dev/null +++ b/mp16/consolidated.15/consolidated-00009-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47a99e550fcb1210f7b413c6f9b46e00d4edea94285a064406b072aba3ec514 +size 4975267754 diff --git a/mp16/consolidated.15/consolidated-00010-of-00011.pth b/mp16/consolidated.15/consolidated-00010-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8695c5340b4a71856365cbd390f27f59e0f0faf --- /dev/null +++ b/mp16/consolidated.15/consolidated-00010-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4005308a333470fe5be628c40a55e558fe11ee2fee5ce87a359edcdd012d76 +size 4941712912 diff --git a/mp16/consolidated.15/consolidated-00011-of-00011.pth b/mp16/consolidated.15/consolidated-00011-of-00011.pth new file mode 100644 index 0000000000000000000000000000000000000000..073bf86a3f5ba43a5f5e11403cb425a234be6353 --- /dev/null +++ b/mp16/consolidated.15/consolidated-00011-of-00011.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c4575096cb1cc39bb6ca57ce9465091933172a490e534683d72d2457533af9 +size 1579985478 diff --git a/mp16/consolidated.15/consolidated.pth.index.json b/mp16/consolidated.15/consolidated.pth.index.json new file mode 100644 index 0000000000000000000000000000000000000000..69268f5c698c7f2e827676420b12761c8f1be069 --- /dev/null +++ b/mp16/consolidated.15/consolidated.pth.index.json @@ -0,0 +1,1144 @@ +{ + "metadata": { + "total_size": 51267928064 + }, + "weight_map": { + "tok_embeddings.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.0.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.0.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.1.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.1.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.2.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.2.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.2.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.3.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.3.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.3.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.4.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.4.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.4.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.5.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.5.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.5.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.6.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.6.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.6.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.7.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.7.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.7.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.8.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.8.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.8.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.9.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.9.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.9.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.10.feed_forward.w2.weight": "consolidated-00001-of-00011.pth", + "layers.10.attention_norm.weight": "consolidated-00001-of-00011.pth", + "layers.10.ffn_norm.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wq.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wk.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wv.weight": "consolidated-00001-of-00011.pth", + "layers.11.attention.wo.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w1.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w3.weight": "consolidated-00001-of-00011.pth", + "layers.11.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.11.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.11.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.12.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.12.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.12.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.13.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.13.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.13.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.14.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.14.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.14.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.15.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.15.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.15.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.16.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.16.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.16.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.17.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.17.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.17.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.18.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.18.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.18.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.19.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.19.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.19.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.20.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.20.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.20.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.21.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.21.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.21.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.22.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.22.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.22.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention.wo.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w1.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w3.weight": "consolidated-00002-of-00011.pth", + "layers.23.feed_forward.w2.weight": "consolidated-00002-of-00011.pth", + "layers.23.attention_norm.weight": "consolidated-00002-of-00011.pth", + "layers.23.ffn_norm.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wq.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wk.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wv.weight": "consolidated-00002-of-00011.pth", + "layers.24.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.24.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.24.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.24.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.25.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.25.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.25.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.26.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.26.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.26.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.27.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.27.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.27.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.28.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.28.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.28.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.29.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.29.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.29.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.30.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.30.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.30.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.31.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.31.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.31.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.32.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.32.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.32.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.33.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.33.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.33.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.34.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.34.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.34.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w3.weight": "consolidated-00003-of-00011.pth", + "layers.35.feed_forward.w2.weight": "consolidated-00003-of-00011.pth", + "layers.35.attention_norm.weight": "consolidated-00003-of-00011.pth", + "layers.35.ffn_norm.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wq.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wk.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wv.weight": "consolidated-00003-of-00011.pth", + "layers.36.attention.wo.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w1.weight": "consolidated-00003-of-00011.pth", + "layers.36.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.36.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.36.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.36.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.37.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.37.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.37.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.38.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.38.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.38.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.39.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.39.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.39.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.40.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.40.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.40.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.41.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.41.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.41.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.42.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.42.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.42.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.43.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.43.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.43.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.44.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.44.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.44.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.45.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.45.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.45.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.46.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.46.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.46.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.47.feed_forward.w2.weight": "consolidated-00004-of-00011.pth", + "layers.47.attention_norm.weight": "consolidated-00004-of-00011.pth", + "layers.47.ffn_norm.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wq.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wk.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wv.weight": "consolidated-00004-of-00011.pth", + "layers.48.attention.wo.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w1.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w3.weight": "consolidated-00004-of-00011.pth", + "layers.48.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.48.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.48.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.49.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.49.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.49.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.50.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.50.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.50.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.51.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.51.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.51.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.52.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.52.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.52.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.53.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.53.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.53.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.54.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.54.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.54.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.55.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.55.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.55.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.56.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.56.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.56.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.57.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.57.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.57.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.58.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.58.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.58.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.59.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.59.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.59.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention.wo.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w1.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w3.weight": "consolidated-00005-of-00011.pth", + "layers.60.feed_forward.w2.weight": "consolidated-00005-of-00011.pth", + "layers.60.attention_norm.weight": "consolidated-00005-of-00011.pth", + "layers.60.ffn_norm.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wq.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wk.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wv.weight": "consolidated-00005-of-00011.pth", + "layers.61.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.61.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.61.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.61.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.62.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.62.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.62.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.63.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.63.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.63.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.64.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.64.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.64.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.65.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.65.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.65.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.66.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.66.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.66.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.67.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.67.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.67.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.68.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.68.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.68.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.69.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.69.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.69.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.70.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.70.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.70.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.71.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.71.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.71.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w3.weight": "consolidated-00006-of-00011.pth", + "layers.72.feed_forward.w2.weight": "consolidated-00006-of-00011.pth", + "layers.72.attention_norm.weight": "consolidated-00006-of-00011.pth", + "layers.72.ffn_norm.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wq.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wk.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wv.weight": "consolidated-00006-of-00011.pth", + "layers.73.attention.wo.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w1.weight": "consolidated-00006-of-00011.pth", + "layers.73.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.73.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.73.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.73.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.74.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.74.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.74.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.75.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.75.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.75.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.76.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.76.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.76.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.77.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.77.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.77.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.78.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.78.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.78.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.79.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.79.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.79.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.80.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.80.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.80.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.81.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.81.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.81.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.82.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.82.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.82.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.83.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.83.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.83.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.84.feed_forward.w2.weight": "consolidated-00007-of-00011.pth", + "layers.84.attention_norm.weight": "consolidated-00007-of-00011.pth", + "layers.84.ffn_norm.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wq.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wk.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wv.weight": "consolidated-00007-of-00011.pth", + "layers.85.attention.wo.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w1.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w3.weight": "consolidated-00007-of-00011.pth", + "layers.85.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.85.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.85.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.86.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.86.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.86.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.87.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.87.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.87.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.88.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.88.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.88.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.89.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.89.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.89.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.90.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.90.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.90.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.91.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.91.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.91.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.92.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.92.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.92.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.93.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.93.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.93.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.94.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.94.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.94.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.95.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.95.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.95.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.96.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.96.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.96.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention.wo.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w1.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w3.weight": "consolidated-00008-of-00011.pth", + "layers.97.feed_forward.w2.weight": "consolidated-00008-of-00011.pth", + "layers.97.attention_norm.weight": "consolidated-00008-of-00011.pth", + "layers.97.ffn_norm.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wq.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wk.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wv.weight": "consolidated-00008-of-00011.pth", + "layers.98.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.98.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.98.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.98.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.99.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.99.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.99.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.100.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.100.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.100.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.101.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.101.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.101.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.102.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.102.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.102.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.103.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.103.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.103.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.104.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.104.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.104.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.105.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.105.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.105.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.106.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.106.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.106.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.107.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.107.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.107.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.108.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.108.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.108.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w3.weight": "consolidated-00009-of-00011.pth", + "layers.109.feed_forward.w2.weight": "consolidated-00009-of-00011.pth", + "layers.109.attention_norm.weight": "consolidated-00009-of-00011.pth", + "layers.109.ffn_norm.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wq.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wk.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wv.weight": "consolidated-00009-of-00011.pth", + "layers.110.attention.wo.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w1.weight": "consolidated-00009-of-00011.pth", + "layers.110.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.110.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.110.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.110.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.111.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.111.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.111.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.112.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.112.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.112.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.113.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.113.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.113.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.114.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.114.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.114.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.115.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.115.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.115.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.116.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.116.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.116.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.117.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.117.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.117.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.118.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.118.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.118.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.119.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.119.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.119.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.120.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.120.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.120.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.121.feed_forward.w2.weight": "consolidated-00010-of-00011.pth", + "layers.121.attention_norm.weight": "consolidated-00010-of-00011.pth", + "layers.121.ffn_norm.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wq.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wk.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wv.weight": "consolidated-00010-of-00011.pth", + "layers.122.attention.wo.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w1.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w3.weight": "consolidated-00010-of-00011.pth", + "layers.122.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.122.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.122.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.123.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.123.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.123.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.124.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.124.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.124.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wq.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wk.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wv.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention.wo.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w1.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w3.weight": "consolidated-00011-of-00011.pth", + "layers.125.feed_forward.w2.weight": "consolidated-00011-of-00011.pth", + "layers.125.attention_norm.weight": "consolidated-00011-of-00011.pth", + "layers.125.ffn_norm.weight": "consolidated-00011-of-00011.pth", + "norm.weight": "consolidated-00011-of-00011.pth", + "output.weight": "consolidated-00011-of-00011.pth" + } +} \ No newline at end of file diff --git a/mp16/params.json b/mp16/params.json new file mode 100644 index 0000000000000000000000000000000000000000..6cf4fb0dbcaf4499ac0906e3a3ef2ed0f3f58a9c --- /dev/null +++ b/mp16/params.json @@ -0,0 +1,12 @@ +{ + "dim": 16384, + "ffn_dim_multiplier": 1.2, + "multiple_of": 4096, + "n_heads": 128, + "n_kv_heads": 16, + "n_layers": 126, + "norm_eps": 1e-05, + "rope_theta": 500000.0, + "use_scaled_rope": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/mp16/tokenizer.model b/mp16/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..a097ce5a06fce0fa3d685a8cfb175cef243dfde9 --- /dev/null +++ b/mp16/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e9d31979e92ab929cd544440f129d9ecd797b69e327f80f17e1c50d5551b55 +size 2183982