jackyoung96 commited on
Commit
1d99449
·
verified ·
1 Parent(s): 9eccc29

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/A.X_from_scratch_logo_ko_4x3.png filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2025 SK Telecom Co., Ltd. All rights reserved.
2
+
3
+ Unless otherwise stated, all files in this repository (including modified model weights
4
+ and tokenizer files) are distributed under the terms of the Apache License, Version 2.0
5
+ (the "License"). You may obtain a copy of the License at:
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software distributed under
10
+ the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
+ ANY KIND, either express or implied. See the License for the specific language governing
12
+ permissions and limitations under the License.
13
+
14
+ ================================================================================
15
+ TRADEMARK
16
+ ================================================================================
17
+
18
+ "SK Telecom" and associated logos are trademarks of SK Telecom Co., Ltd.
19
+ This License does not grant permission to use these trademarks without prior
20
+ written consent.
21
+
22
+ ================================================================================
23
+ APACHE LICENSE 2.0
24
+ ================================================================================
25
+
26
+ Apache License
27
+ Version 2.0, January 2004
28
+ http://www.apache.org/licenses/
29
+
30
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
31
+
32
+ 1. Definitions.
33
+
34
+ "License" shall mean the terms and conditions for use, reproduction,
35
+ and distribution as defined by Sections 1 through 9 of this document.
36
+
37
+ "Licensor" shall mean the copyright owner or entity authorized by
38
+ the copyright owner that is granting the License.
39
+
40
+ "Legal Entity" shall mean the union of the acting entity and all
41
+ other entities that control, are controlled by, or are under common
42
+ control with that entity. For the purposes of this definition,
43
+ "control" means (i) the power, direct or indirect, to cause the
44
+ direction or management of such entity, whether by contract or
45
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
46
+ outstanding shares, or (iii) beneficial ownership of such entity.
47
+
48
+ "You" (or "Your") shall mean an individual or Legal Entity
49
+ exercising permissions granted by this License.
50
+
51
+ "Source" form shall mean the preferred form for making modifications,
52
+ including but not limited to software source code, documentation
53
+ source, and configuration files.
54
+
55
+ "Object" form shall mean any form resulting from mechanical
56
+ transformation or translation of a Source form, including but
57
+ not limited to compiled object code, generated documentation,
58
+ and conversions to other media types.
59
+
60
+ "Work" shall mean the work of authorship, whether in Source or
61
+ Object form, made available under the License, as indicated by a
62
+ copyright notice that is included in or attached to the work
63
+ (an example is provided in the Appendix below).
64
+
65
+ "Derivative Works" shall mean any work, whether in Source or Object
66
+ form, that is based on (or derived from) the Work and for which the
67
+ editorial revisions, annotations, elaborations, or other modifications
68
+ represent, as a whole, an original work of authorship. For the purposes
69
+ of this License, Derivative Works shall not include works that remain
70
+ separable from, or merely link (or bind by name) to the interfaces of,
71
+ the Work and Derivative Works thereof.
72
+
73
+ "Contribution" shall mean any work of authorship, including
74
+ the original version of the Work and any modifications or additions
75
+ to that Work or Derivative Works thereof, that is intentionally
76
+ submitted to Licensor for inclusion in the Work by the copyright owner
77
+ or by an individual or Legal Entity authorized to submit on behalf of
78
+ the copyright owner. For the purposes of this definition, "submitted"
79
+ means any form of electronic, verbal, or written communication sent
80
+ to the Licensor or its representatives, including but not limited to
81
+ communication on electronic mailing lists, source code control systems,
82
+ and issue tracking systems that are managed by, or on behalf of, the
83
+ Licensor for the purpose of discussing and improving the Work, but
84
+ excluding communication that is conspicuously marked or otherwise
85
+ designated in writing by the copyright owner as "Not a Contribution."
86
+
87
+ "Contributor" shall mean Licensor and any individual or Legal Entity
88
+ on behalf of whom a Contribution has been received by Licensor and
89
+ subsequently incorporated within the Work.
90
+
91
+ 2. Grant of Copyright License. Subject to the terms and conditions of
92
+ this License, each Contributor hereby grants to You a perpetual,
93
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
94
+ copyright license to reproduce, prepare Derivative Works of,
95
+ publicly display, publicly perform, sublicense, and distribute the
96
+ Work and such Derivative Works in Source or Object form.
97
+
98
+ 3. Grant of Patent License. Subject to the terms and conditions of
99
+ this License, each Contributor hereby grants to You a perpetual,
100
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
101
+ (except as stated in this section) patent license to make, have made,
102
+ use, offer to sell, sell, import, and otherwise transfer the Work,
103
+ where such license applies only to those patent claims licensable
104
+ by such Contributor that are necessarily infringed by their
105
+ Contribution(s) alone or by combination of their Contribution(s)
106
+ with the Work to which such Contribution(s) was submitted. If You
107
+ institute patent litigation against any entity (including a
108
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
109
+ or a Contribution incorporated within the Work constitutes direct
110
+ or contributory patent infringement, then any patent licenses
111
+ granted to You under this License for that Work shall terminate
112
+ as of the date such litigation is filed.
113
+
114
+ 4. Redistribution. You may reproduce and distribute copies of the
115
+ Work or Derivative Works thereof in any medium, with or without
116
+ modifications, and in Source or Object form, provided that You
117
+ meet the following conditions:
118
+
119
+ (a) You must give any other recipients of the Work or
120
+ Derivative Works a copy of this License; and
121
+
122
+ (b) You must cause any modified files to carry prominent notices
123
+ stating that You changed the files; and
124
+
125
+ (c) You must retain, in the Source form of any Derivative Works
126
+ that You distribute, all copyright, patent, trademark, and
127
+ attribution notices from the Source form of the Work,
128
+ excluding those notices that do not pertain to any part of
129
+ the Derivative Works; and
130
+
131
+ (d) If the Work includes a "NOTICE" text file as part of its
132
+ distribution, then any Derivative Works that You distribute must
133
+ include a readable copy of the attribution notices contained
134
+ within such NOTICE file, excluding those notices that do not
135
+ pertain to any part of the Derivative Works, in at least one
136
+ of the following places: within a NOTICE text file distributed
137
+ as part of the Derivative Works; within the Source form or
138
+ documentation, if provided along with the Derivative Works; or,
139
+ within a display generated by the Derivative Works, if and
140
+ wherever such third-party notices normally appear. The contents
141
+ of the NOTICE file are for informational purposes only and
142
+ do not modify the License. You may add Your own attribution
143
+ notices within Derivative Works that You distribute, alongside
144
+ or as an addendum to the NOTICE text from the Work, provided
145
+ that such additional attribution notices cannot be construed
146
+ as modifying the License.
147
+
148
+ You may add Your own copyright statement to Your modifications and
149
+ may provide additional or different license terms and conditions
150
+ for use, reproduction, or distribution of Your modifications, or
151
+ for any such Derivative Works as a whole, provided Your use,
152
+ reproduction, and distribution of the Work otherwise complies with
153
+ the conditions stated in this License.
154
+
155
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
156
+ any Contribution intentionally submitted for inclusion in the Work
157
+ by You to the Licensor shall be under the terms and conditions of
158
+ this License, without any additional terms or conditions.
159
+ Notwithstanding the above, nothing herein shall supersede or modify
160
+ the terms of any separate license agreement you may have executed
161
+ with Licensor regarding such Contributions.
162
+
163
+ 6. Trademarks. This License does not grant permission to use the trade
164
+ names, trademarks, service marks, or product names of the Licensor,
165
+ except as required for reasonable and customary use in describing the
166
+ origin of the Work and reproducing the content of the NOTICE file.
167
+
168
+ 7. Disclaimer of Warranty. Unless required by applicable law or
169
+ agreed to in writing, Licensor provides the Work (and each
170
+ Contributor provides its Contributions) on an "AS IS" BASIS,
171
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
172
+ implied, including, without limitation, any warranties or conditions
173
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
174
+ PARTICULAR PURPOSE. You are solely responsible for determining the
175
+ appropriateness of using or redistributing the Work and assume any
176
+ risks associated with Your exercise of permissions under this License.
177
+
178
+ 8. Limitation of Liability. In no event and under no legal theory,
179
+ whether in tort (including negligence), contract, or otherwise,
180
+ unless required by applicable law (such as deliberate and grossly
181
+ negligent acts) or agreed to in writing, shall any Contributor be
182
+ liable to You for damages, including any direct, indirect, special,
183
+ incidental, or consequential damages of any character arising as a
184
+ result of this License or out of the use or inability to use the
185
+ Work (including but not limited to damages for loss of goodwill,
186
+ work stoppage, computer failure or malfunction, or any and all
187
+ other commercial damages or losses), even if such Contributor
188
+ has been advised of the possibility of such damages.
189
+
190
+ 9. Accepting Warranty or Additional Liability. While redistributing
191
+ the Work or Derivative Works thereof, You may choose to offer,
192
+ and charge a fee for, acceptance of support, warranty, indemnity,
193
+ or other liability obligations and/or rights consistent with this
194
+ License. However, in accepting such obligations, You may act only
195
+ on Your own behalf and on Your sole responsibility, not on behalf
196
+ of any other Contributor, and only if You agree to indemnify,
197
+ defend, and hold each Contributor harmless for any liability
198
+ incurred by, or claims asserted against, such Contributor by reason
199
+ of your accepting any such warranty or additional liability.
200
+
201
+ END OF TERMS AND CONDITIONS
202
+
203
+ APPENDIX: How to apply the Apache License to your work.
204
+
205
+ To apply the Apache License to your work, attach the following
206
+ boilerplate notice, with the fields enclosed by brackets "[]"
207
+ replaced with your own identifying information. (Don't include
208
+ the brackets!) The text should be enclosed in the appropriate
209
+ comment syntax for the file format. We also recommend that a
210
+ file or class name and description of purpose be included on the
211
+ same "printed page" as the copyright notice for easier
212
+ identification within third-party archives.
213
+
214
+ Copyright 2025 SK Telecom Co.
215
+
216
+ Licensed under the Apache License, Version 2.0 (the "License");
217
+ you may not use this file except in compliance with the License.
218
+ You may obtain a copy of the License at
219
+
220
+ http://www.apache.org/licenses/LICENSE-2.0
221
+
222
+ Unless required by applicable law or agreed to in writing, software
223
+ distributed under the License is distributed on an "AS IS" BASIS,
224
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
225
+ See the License for the specific language governing permissions and
226
+ limitations under the License.
README.md CHANGED
@@ -1,3 +1,562 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ license_link: https://huggingface.co/skt/A.X-3.1/blob/main/LICENSE
4
+ language:
5
+ - en
6
+ - ko
7
+ pipeline_tag: text-generation
8
+ library_name: transformers
9
+ model_id: skt/A.X-3.1
10
+ developers: SKT AI Model Lab
11
+ model-index:
12
+ - name: A.X-3.1
13
+ results:
14
+ - task:
15
+ type: generate_until
16
+ name: mmlu
17
+ dataset:
18
+ name: mmlu (chat CoT)
19
+ type: hails/mmlu_no_train
20
+ metrics:
21
+ - type: exact_match
22
+ value: 75.1
23
+ name: exact_match
24
+ - task:
25
+ type: generate_until
26
+ name: kmmlu
27
+ dataset:
28
+ name: kmmlu (chat CoT)
29
+ type: HAERAE-HUB/KMMLU
30
+ metrics:
31
+ - type: exact_match
32
+ value: 69.2
33
+ name: exact_match
34
+ ---
35
+
36
+ # A.X 3.1
37
+
38
+ <div align="center">
39
+ <img src="./assets/A.X_from_scratch_logo_ko_4x3.png" alt="A.X Logo" width="300"/>
40
+ </div>
41
+ <p align="center"> <a href="https://huggingface.co/collections/skt/ax-3-686b288b3b05e1234f3f4c73">🤗 Models</a> | <a href="https://github.com/SKT-AI/A.X-3">🖥️ Github</a> </p>
42
+
43
+ ## A.X 3.1 Highlights
44
+
45
+ SK Telecom released **A.X 3.1** (pronounced "A dot X"), a large language model (LLM) optimized for Korean-language understanding and enterprise deployment, on July 24, 2025.
46
+ This sovereign AI model was developed entirely in-house by SKT, encompassing model architecture, data curation, and training, all carried out on SKT’s proprietary supercomputing infrastructure, TITAN.
47
+ The model was trained from scratch on a high-quality multilingual corpus comprising **2.1 trillion tokens**, with a primary focus on the Korean language.
48
+
49
+ - **Authentic Korean Sovereign AI**: A.X 3.1 was trained on a high-quality multilingual dataset—fully curated in-house—using SKT’s proprietary GPU infrastructure.
50
+ - **Highly Efficient Multilingual LLM**: A.X 3.1 demonstrates superior performance among Korean LLMs, despite its relatively compact training size of 2.1 trillion tokens.
51
+ - **Superior Korean Proficiency**: A.X 3.1 achieved a score of **69.2** on the [KMMLU](https://huggingface.co/datasets/HAERAE-HUB/KMMLU): the leading benchmark for Korean-language evaluation and a Korean-specific adaptation of MMLU, outperforming other Korean-specified models.
52
+ - **Deep Korean Understanding**: A.X 3.1 obtained **77.4** on the [CLIcK](https://huggingface.co/datasets/EunsuKim/CLIcK): a benchmark for Korean cultural and contextual comprehension, outperforming other open-source models.
53
+ - **Efficient Token Usage**: A.X 3.1 requires approximately 33% fewer tokens than GPT-4o to process equivalent Korean inputs, facilitating more cost-effective and computationally efficient inference.
54
+ - **Long-Context Handling**: A.X 3.1 supports up to **32,768 tokens** natively, and up to **131,072 tokens** by applying YaRN.
55
+
56
+
57
+ ## Core Technologies
58
+
59
+ A.X 3.1 represents **an efficient sovereign AI model**, developed end-to-end by SKT, encompassing model architecture, data curation, infrastructure deployment, and optimization.
60
+
61
+ ### Model Architecture Specs
62
+
63
+ <table><thead>
64
+ <tr>
65
+ <th>Model</th>
66
+ <th># Params</th>
67
+ <th># Layers</th>
68
+ <th># KV-Heads</th>
69
+ <th>Hidden Dim</th>
70
+ <th>FFN Dim</th>
71
+ </tr>
72
+ <tr>
73
+ <th>A.X 3.1</th>
74
+ <th>34B</th>
75
+ <th>48</th>
76
+ <th>8</th>
77
+ <th>8192</th>
78
+ <th>21824</th>
79
+ </tr>
80
+ </thead>
81
+ </table>
82
+
83
+ ### High-Quality Data Pipeline & Strategic Mixture
84
+
85
+ - We collected and curated a training dataset comprising 20 trillion tokens sourced from diverse domains.
86
+ - The entire dataset was processed through SKT’s proprietary data pipeline, incorporating synthetic data generation and comprehensive quality filtering.
87
+ - For training A.X 3.1, a total of **2.1 trillion tokens** were utilized, comprising a Korean-focused multilingual corpus.
88
+
89
+
90
+ ## Benchmark Results
91
+
92
+ ### Model Performance
93
+
94
+ <table>
95
+ <caption style="text-align:left; caption-side:bottom">* self-reported score</caption>
96
+ <thead>
97
+ <tr>
98
+ <th></th>
99
+ <th></th>
100
+ <th>A.X 3.1</th>
101
+ <th>EXAONE-3.5-32B</th>
102
+ <th>Kanana-flag-32.5B</th>
103
+ <th>Gemma-3-27B</th>
104
+ <th>Qwen2.5-32B</th>
105
+ </tr></thead>
106
+ <tbody>
107
+ <tr>
108
+ <td rowspan="5">Knowledge</td>
109
+ <td>KMMLU</td>
110
+ <td>69.73</td>
111
+ <td>57.17</td>
112
+ <td>64.19*</td>
113
+ <td>59.45</td>
114
+ <td>61.93</td>
115
+ </tr>
116
+ <tr>
117
+ <td>KMMLU-pro</td>
118
+ <td>54.89</td>
119
+ <td>45.39</td>
120
+ <td>-</td>
121
+ <td>50.43</td>
122
+ <td>52.34</td>
123
+ </tr>
124
+ <tr>
125
+ <td>KMMLU-redux</td>
126
+ <td>62.66</td>
127
+ <td>48.32</td>
128
+ <td>-</td>
129
+ <td>54.85</td>
130
+ <td>52.15</td>
131
+ </tr>
132
+ <tr>
133
+ <td>Click (chat CoT)</td>
134
+ <td>77.09</td>
135
+ <td>69.42</td>
136
+ <td>-</td>
137
+ <td>71.03</td>
138
+ <td>68.17</td>
139
+ </tr>
140
+ <tr>
141
+ <td>MMLU</td>
142
+ <td>75.20</td>
143
+ <td>77.1</td>
144
+ <td>81.08*</td>
145
+ <td>82.35</td>
146
+ <td>83.4</td>
147
+ </tr>
148
+ <tr>
149
+ <td rowspan="2">General</td>
150
+ <td>Ko-MT-bench</td>
151
+ <td>83.06</td>
152
+ <td>80.19</td>
153
+ <td>80.58*</td>
154
+ <td>85.5</td>
155
+ <td>72.88</td>
156
+ </tr>
157
+ <tr>
158
+ <td>MT-bench</td>
159
+ <td>84.19</td>
160
+ <td>85.09</td>
161
+ <td>83.56*</td>
162
+ <td>84.38</td>
163
+ <td>87.31</td>
164
+ </tr>
165
+ <tr>
166
+ <td rowspan="2">IF</td>
167
+ <td>Ko-IFEval</td>
168
+ <td>75.29</td>
169
+ <td>68.67</td>
170
+ <td>-</td>
171
+ <td>74.4</td>
172
+ <td>73.24</td>
173
+ </tr>
174
+ <tr>
175
+ <td>IFEval</td>
176
+ <td>87.11</td>
177
+ <td>82.67</td>
178
+ <td>85.6*</td>
179
+ <td>82.45</td>
180
+ <td>82.27</td>
181
+ </tr>
182
+ <tr>
183
+ <td rowspan="2">Math<br> </td>
184
+ <td>HRM8K</td>
185
+ <td>45.53</td>
186
+ <td>36.3</td>
187
+ <td>-</td>
188
+ <td>48</td>
189
+ <td>41.29</td>
190
+ </tr>
191
+ <tr>
192
+ <td>MATH</td>
193
+ <td>75.40</td>
194
+ <td>61.64</td>
195
+ <td>57.82*</td>
196
+ <td>80.72</td>
197
+ <td>73.26</td>
198
+ </tr>
199
+ <tr>
200
+ <td rowspan="3">Code<br> <br> </td>
201
+ <td>HumanEval+</td>
202
+ <td>75.00</td>
203
+ <td>77.44</td>
204
+ <td>77.44*</td>
205
+ <td>78.66</td>
206
+ <td>82.32</td>
207
+ </tr>
208
+ <tr>
209
+ <td>MBPP+</td>
210
+ <td>70.90</td>
211
+ <td>65.87</td>
212
+ <td>69.84*</td>
213
+ <td>74.07</td>
214
+ <td>73.81</td>
215
+ </tr>
216
+ <tr>
217
+ <td>LiveCodeBench</td>
218
+ <td>23.34</td>
219
+ <td>17.2</td>
220
+ <td>-</td>
221
+ <td>30.55</td>
222
+ <td>26.9</td>
223
+ </tr>
224
+ </tbody></table>
225
+
226
+
227
+ ### Lightweight Model Performance
228
+
229
+ <table><thead>
230
+ <tr>
231
+ <th colspan="2">Benchmarks</th>
232
+ <th>A.X 3.1 Light</th>
233
+ <th>Kanana-1.5-8B</th>
234
+ <th>EXAONE-3.5-7.8B</th>
235
+ <th>Qwen2.5-7B</th>
236
+ <th>Qwen3-8B<br>(w/o reasoning)</th>
237
+ </tr></thead>
238
+ <tbody>
239
+ <tr>
240
+ <td rowspan="6">Knowledge</td>
241
+ <td>KMMLU</td>
242
+ <td>61.70</td>
243
+ <td>48.28</td>
244
+ <td>53.76</td>
245
+ <td>49.56</td>
246
+ <td>63.53</td>
247
+ </tr>
248
+ <tr>
249
+ <td>KMMLU-pro</td>
250
+ <td>45.54</td>
251
+ <td>37.63</td>
252
+ <td>40.11</td>
253
+ <td>38.87</td>
254
+ <td>50.71</td>
255
+ </tr>
256
+ <tr>
257
+ <td>KMMLU-redux</td>
258
+ <td>52.34</td>
259
+ <td>35.33</td>
260
+ <td>42.21</td>
261
+ <td>38.58</td>
262
+ <td>55.74</td>
263
+ </tr>
264
+ <tr>
265
+ <td>CLIcK</td>
266
+ <td>71.22</td>
267
+ <td>61.30</td>
268
+ <td>64.11</td>
269
+ <td>58.30</td>
270
+ <td>63.31</td>
271
+ </tr>
272
+ <tr>
273
+ <td>KoBALT</td>
274
+ <td>27.43</td>
275
+ <td>23.14</td>
276
+ <td>21.71</td>
277
+ <td>21.57</td>
278
+ <td>26.57</td>
279
+ </tr>
280
+ <tr>
281
+ <td>MMLU</td>
282
+ <td>66.95</td>
283
+ <td>68.82</td>
284
+ <td>72.20</td>
285
+ <td>75.40</td>
286
+ <td>82.89</td>
287
+ </tr>
288
+ <tr>
289
+ <td rowspan="2">General</td>
290
+ <td>Ko-MT-Bench</td>
291
+ <td>78.56</td>
292
+ <td>76.30</td>
293
+ <td>81.06</td>
294
+ <td>61.31</td>
295
+ <td>64.06</td>
296
+ </tr>
297
+ <tr>
298
+ <td>MT-Bench</td>
299
+ <td>74.38</td>
300
+ <td>77.60</td>
301
+ <td>83.50</td>
302
+ <td>79.37</td>
303
+ <td>65.69</td>
304
+ </tr>
305
+ <tr>
306
+ <td rowspan="2">Instruction<br>Following</td>
307
+ <td>Ko-IFEval</td>
308
+ <td>70.04</td>
309
+ <td>69.96</td>
310
+ <td>65.01</td>
311
+ <td>60.73</td>
312
+ <td>73.39</td>
313
+ </tr>
314
+ <tr>
315
+ <td>IFEval</td>
316
+ <td>79.86</td>
317
+ <td>80.11</td>
318
+ <td>82.61</td>
319
+ <td>76.73</td>
320
+ <td>85.38</td>
321
+ </tr>
322
+ <tr>
323
+ <td rowspan="2">Math</td>
324
+ <td>HRM8K</td>
325
+ <td>41.70</td>
326
+ <td>30.87</td>
327
+ <td>31.88</td>
328
+ <td>35.13</td>
329
+ <td>52.50</td>
330
+ </tr>
331
+ <tr>
332
+ <td>MATH</td>
333
+ <td>70.14</td>
334
+ <td>59.28</td>
335
+ <td>63.20</td>
336
+ <td>65.58</td>
337
+ <td>71.48</td>
338
+ </tr>
339
+ <tr>
340
+ <td rowspan="2">Code<br></td>
341
+ <td>HumanEval+</td>
342
+ <td>73.78</td>
343
+ <td>76.83</td>
344
+ <td>76.83</td>
345
+ <td>74.39</td>
346
+ <td>77.44</td>
347
+ </tr>
348
+ <tr>
349
+ <td>MBPP+</td>
350
+ <td>61.64</td>
351
+ <td>67.99</td>
352
+ <td>64.29</td>
353
+ <td>68.50</td>
354
+ <td>62.17</td>
355
+ </tr>
356
+ </tbody></table>
357
+
358
+ ## 🚀 Quickstart
359
+
360
+ ### with HuggingFace Transformers
361
+
362
+ - `transformers>=4.46.0` or the latest version is required to use `skt/A.X-3.1`
363
+ ```bash
364
+ pip install transformers>=4.46.0
365
+ ```
366
+
367
+ #### Example Usage
368
+
369
+ ```python
370
+ import torch
371
+ from transformers import AutoModelForCausalLM, AutoTokenizer
372
+
373
+ model_name = "skt/A.X-3.1"
374
+ model = AutoModelForCausalLM.from_pretrained(
375
+ model_name,
376
+ torch_dtype=torch.bfloat16,
377
+ device_map="auto",
378
+ )
379
+ model.eval()
380
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
381
+
382
+ messages = [
383
+ {"role": "system", "content": "당신은 사용자가 제공하는 영어 문장들을 한국어로 번역하는 AI 전문가입니다."},
384
+ {"role": "user", "content": "The first human went into space and orbited the Earth on April 12, 1961."},
385
+ ]
386
+ input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
387
+
388
+ with torch.no_grad():
389
+ output = model.generate(
390
+ input_ids,
391
+ max_new_tokens=128,
392
+ do_sample=False,
393
+ )
394
+
395
+ len_input_prompt = len(input_ids[0])
396
+ response = tokenizer.decode(output[0][len_input_prompt:], skip_special_tokens=True)
397
+ print(response)
398
+ # Output:
399
+ # 우주에서 인간이 처음으로 지구 궤도를 돈 날은 1961년 4월 12일입니다.
400
+ ```
401
+
402
+ ### with vLLM
403
+
404
+ - `vllm>=v0.6.4.post1` or the latest version is required to use tool-use feature
405
+ ```bash
406
+ pip install vllm>=v0.6.4.post1
407
+ # if you don't want to activate tool-use feature, just commenting out below vLLM option
408
+ VLLM_OPTION="--enable-auto-tool-choice --tool-call-parser hermes"
409
+ vllm serve skt/A.X-3.1 $VLLM_OPTION
410
+ ```
411
+
412
+ #### Example Usage
413
+
414
+ ```python
415
+ from openai import OpenAI
416
+
417
+ def call(messages, model):
418
+ completion = client.chat.completions.create(
419
+ model=model,
420
+ messages=messages,
421
+ )
422
+ print(completion.choices[0].message)
423
+
424
+ client = OpenAI(
425
+ base_url="http://localhost:8000/v1",
426
+ api_key="api_key"
427
+ )
428
+ model = "skt/A.X-3.1"
429
+ messages = [{"role": "user", "content": "에어컨 여름철 적정 온도는? 한줄로 답변해줘"}]
430
+ call(messages, model)
431
+ # Output:
432
+ # 여름철 에어컨 적정 온도는 24~26도입니다.
433
+
434
+ messages = [{"role": "user", "content": "What is the appropriate temperature for air conditioning in summer? Respond in a single sentence."}]
435
+ call(messages, model)
436
+ # Output:
437
+ # The appropriate temperature for air conditioning in summer is around 78°F (26°C).
438
+ ```
439
+
440
+ #### Examples for tool-use
441
+ ```python
442
+ from openai import OpenAI
443
+
444
+
445
+ def call(messages, model):
446
+ completion = client.chat.completions.create(
447
+ model=model,
448
+ messages=messages,
449
+ tools=tools
450
+ )
451
+ print(completion.choices[0].message)
452
+
453
+
454
+ client = OpenAI(
455
+ base_url="http://localhost:8000/v1",
456
+ api_key="api_key"
457
+ )
458
+ model = "skt/A.X-3.1"
459
+
460
+ calculate_discount = {
461
+ "type": "function",
462
+ "function": {
463
+ "name": "calculate_discount",
464
+ "description": "원가격과 할인율(퍼센트 단위)을 입력받아 할인된 가격을계산한다.",
465
+ "parameters": {
466
+ "type": "object",
467
+ "properties": {
468
+ "original_price": {
469
+ "type": "number",
470
+ "description": "상품의 원래 가격"
471
+ },
472
+ "discount_percentage": {
473
+ "type": "number",
474
+ "description": "적용할 할인율"
475
+ }
476
+ },
477
+ "required": ["original_price", "discount_percentage"]
478
+ }
479
+ }
480
+ }
481
+ get_exchange_rate = {
482
+ "type": "function",
483
+ "function": {
484
+ "name": "get_exchange_rate",
485
+ "description": "두 통화 간의 환율을 가져온다.",
486
+ "parameters": {
487
+ "type": "object",
488
+ "properties": {
489
+ "base_currency": {
490
+ "type": "string",
491
+ "description": "The currency to convert from."
492
+ },
493
+ "target_currency": {
494
+ "type": "string",
495
+ "description": "The currency to convert to."
496
+ }
497
+ },
498
+ "required": ["base_currency", "target_currency"]
499
+ }
500
+ }
501
+ }
502
+ tools = [calculate_discount, get_exchange_rate]
503
+
504
+ ### Slot filling ###
505
+ messages = [{"role": "user", "content": "우리가 뭘 사야되는데 원가가 57600원인데 직원할인 받으면 얼마야?"}]
506
+ call(messages, model)
507
+ # Output:
508
+ # ChatCompletionMessage(content='직원 할인율이 몇 퍼센트인지 알려주신다면 할인된 가격을 계산할 수 있습니다. 할인율이 몇 퍼센트인지 알려주실 수 있나요?', role='assistant', tool_calls=[])
509
+
510
+
511
+ ### Function calling ###
512
+ messages = [
513
+ {"role": "user", "content": "우리가 뭘 사야되는데 원가가 57600원인데 직원할인 받으면 얼마야?"},
514
+ {"role": "assistant", "content": "직원 할인율이 몇 퍼센트인지 알려주신다면 할인된 가격을 계산할 수 있습니다. 할인율이 몇 퍼센트인지 알려주실 수 있나요?"},
515
+ {"role": "user", "content": "15% 할인 받을 수 있어."},
516
+ ]
517
+ call(messages, model)
518
+ # Output:
519
+ # ChatCompletionMessage(content=None, role='assistant', tool_calls=[ChatCompletionMessageToolCall(id='chatcmpl-tool-cb9e827f752d4725abc94377223b2b0f', function=Function(arguments='{"original_price": 57600, "discount_percentage": 15}', name='calculate_discount'), type='function')])
520
+
521
+
522
+ ### Completion ###
523
+ messages = [
524
+ {"role": "user", "content": "우리가 뭘 사야되는데 원가가 57600원인데 직원할인 받으면 얼마야?"},
525
+ {"role": "assistant", "content": "직원 할인율이 몇 퍼센트인지 알려주신다면 할인된 가격을 계산할 수 있습니다. 할인율이 몇 퍼센트인지 알려주실 수 있나요?"},
526
+ {"role": "user", "content": "15% 할인 받을 수 있어."},
527
+ {"role": "tool", "tool_call_id": "random_id", "name": "calculate_discount", "content": "{\"original_price\": 57600, \"discount_percentage\": 15, \"discounted_price\": 48960.0}"}
528
+ ]
529
+ call(messages, model)
530
+ # Output:
531
+ # ChatCompletionMessage(content='직원 할인을 받으면 57600원의 상품은 15% 할인을 받아 48960원이 됩니다.', role='assistant', tool_calls=[])
532
+ ```
533
+
534
+ ### Extend supported token length
535
+
536
+ The `config.json` file of A.X 3.1 uploaded to HuggingFace is configured for maximum token lengths of 32,768. You can simply handle up to 131,072 tokens by modifying `rope_scaling` field in `config.json` file into the following parameters:
537
+
538
+ ```
539
+ "rope_scaling": {
540
+ "type": "yarn",
541
+ "factor": 4.0,
542
+ "original_max_position_embeddings": 32768,
543
+ },
544
+ ```
545
+
546
+ ## License
547
+
548
+ The `A.X 3.1` model is licensed under `Apache License 2.0`.
549
+
550
+ ## Citation
551
+ ```
552
+ @article{SKTAdotX3.1,
553
+ title={A.X 3.1},
554
+ author={SKT AI Model Lab},
555
+ year={2025},
556
+ url={https://huggingface.co/skt/A.X-3.1}
557
+ }
558
+ ```
559
+
560
+ ## Contact
561
+
562
+ - Business & Partnership Contact: [[email protected]]([email protected])
assets/A.X_from_scratch_logo_ko_4x3.png ADDED

Git LFS Details

  • SHA256: 5e41e606567955055a7085c604d922728c926412eafe970faf1d1ec6a62f78b4
  • Pointer size: 131 Bytes
  • Size of remote file: 164 kB
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 0,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 21824,
14
+ "max_position_embeddings": 32768,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 48,
19
+ "num_key_value_heads": 8,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 500000,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.3",
27
+ "use_cache": false,
28
+ "vocab_size": 102400
29
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "eos_token_id": 27,
4
+ "max_new_tokens": 28000,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.51.3"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af27f1de3587cf2f7f6adb1a57ec8d195945c34d9b4a65d54b6f17370a0638a
3
+ size 4729145992
model-00002-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccfca12db8238a145e00162c90a3cc2828d48c10f426a68aedd822eaadff3674
3
+ size 4839279920
model-00003-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8840073b04df6688afb127dcf8c11e52cb01eea30e17ccfdfffd397ef8efdd6c
3
+ size 4783738744
model-00004-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e916002f5ddb6427605c7e61bff22e01d79ef84175c8e469f0041b8f46311fdb
3
+ size 4839279944
model-00005-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736d142f43c2f4d32b7bcf5445264d1e6103fe81b5dd17b73a3c36efab0214f7
3
+ size 4783738776
model-00006-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07baf531c43e17f6eb107636d73047ff7a2989bc11456ffe0d54232c93526c69
3
+ size 4839279944
model-00007-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259e3d600b1f9f25bbbfbb1721707814a1be2b4dde0e9b4d4bea29363bdee7eb
3
+ size 4783738776
model-00008-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe8f2779fa30814a4cbb7534117b447338ae15bf48a2cfdeda8d172952f9656
3
+ size 4839279944
model-00009-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee8f88de5b09e752a5b4d64eb4bfb09bf5dcbc3d911a3cdae0177c19bcc70ea
3
+ size 4783738776
model-00010-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f62701a5e81ffd23534df25bffbdb78ca3bc33970dd40e6b51ca5b7ad4c40f1
3
+ size 4839279944
model-00011-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9ad98dcf1bb9656267deaba2781aba85f7b804511dd45d87b0c4f560324637
3
+ size 4783738776
model-00012-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eab76a1631f34db3c7c1ce6e3c1ca0f6cb3139b646054d456547462ccf171260
3
+ size 4839279944
model-00013-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c837805765a9d070695869106cbde43aefa7ce00972ec082a0e93d2c8b60a53e
3
+ size 4783738776
model-00014-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dac245c497ec34841d1665a6500bfce7e44c74f0bbc89c4b69a7e0f24ec7406
3
+ size 4839279944
model-00015-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c1c4c035eff40ceaa44e759eaa296ed63139b321103c5a7e08b0a3252841fe
3
+ size 2035335736
model.safetensors.index.json ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 69341822976
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00015-of-00015.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00015.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00004-of-00015.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00004-of-00015.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00005-of-00015.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00005-of-00015.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00005-of-00015.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00005-of-00015.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00006-of-00015.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00006-of-00015.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00006-of-00015.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00007-of-00015.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00015.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00007-of-00015.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00007-of-00015.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00007-of-00015.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00008-of-00015.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00008-of-00015.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00008-of-00015.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00009-of-00015.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00009-of-00015.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00009-of-00015.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00009-of-00015.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00010-of-00015.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00010-of-00015.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
242
+ "model.layers.32.input_layernorm.weight": "model-00010-of-00015.safetensors",
243
+ "model.layers.32.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
244
+ "model.layers.32.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
245
+ "model.layers.32.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
246
+ "model.layers.32.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
247
+ "model.layers.32.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
248
+ "model.layers.32.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
249
+ "model.layers.32.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
250
+ "model.layers.32.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
251
+ "model.layers.33.input_layernorm.weight": "model-00011-of-00015.safetensors",
252
+ "model.layers.33.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
253
+ "model.layers.33.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
254
+ "model.layers.33.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
255
+ "model.layers.33.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
256
+ "model.layers.33.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
257
+ "model.layers.33.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
258
+ "model.layers.33.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
259
+ "model.layers.33.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
260
+ "model.layers.34.input_layernorm.weight": "model-00011-of-00015.safetensors",
261
+ "model.layers.34.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
262
+ "model.layers.34.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
263
+ "model.layers.34.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
264
+ "model.layers.34.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
265
+ "model.layers.34.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
266
+ "model.layers.34.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
267
+ "model.layers.34.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
268
+ "model.layers.34.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
269
+ "model.layers.35.input_layernorm.weight": "model-00011-of-00015.safetensors",
270
+ "model.layers.35.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
271
+ "model.layers.35.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
272
+ "model.layers.35.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
273
+ "model.layers.35.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
274
+ "model.layers.35.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
275
+ "model.layers.35.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
276
+ "model.layers.35.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
277
+ "model.layers.35.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
278
+ "model.layers.36.input_layernorm.weight": "model-00011-of-00015.safetensors",
279
+ "model.layers.36.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
280
+ "model.layers.36.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
281
+ "model.layers.36.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
282
+ "model.layers.36.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
283
+ "model.layers.36.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
284
+ "model.layers.36.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
285
+ "model.layers.36.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
286
+ "model.layers.36.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
287
+ "model.layers.37.input_layernorm.weight": "model-00012-of-00015.safetensors",
288
+ "model.layers.37.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
289
+ "model.layers.37.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
290
+ "model.layers.37.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
291
+ "model.layers.37.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
292
+ "model.layers.37.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
293
+ "model.layers.37.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
294
+ "model.layers.37.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
295
+ "model.layers.37.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
296
+ "model.layers.38.input_layernorm.weight": "model-00012-of-00015.safetensors",
297
+ "model.layers.38.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
298
+ "model.layers.38.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
299
+ "model.layers.38.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
300
+ "model.layers.38.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
301
+ "model.layers.38.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
302
+ "model.layers.38.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
303
+ "model.layers.38.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
304
+ "model.layers.38.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
305
+ "model.layers.39.input_layernorm.weight": "model-00012-of-00015.safetensors",
306
+ "model.layers.39.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
307
+ "model.layers.39.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
308
+ "model.layers.39.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
309
+ "model.layers.39.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
310
+ "model.layers.39.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
311
+ "model.layers.39.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
312
+ "model.layers.39.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
313
+ "model.layers.39.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
314
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
315
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
316
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
317
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
318
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
319
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
320
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
321
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
322
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
323
+ "model.layers.40.input_layernorm.weight": "model-00013-of-00015.safetensors",
324
+ "model.layers.40.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
325
+ "model.layers.40.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
326
+ "model.layers.40.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
327
+ "model.layers.40.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
328
+ "model.layers.40.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
329
+ "model.layers.40.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
330
+ "model.layers.40.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
331
+ "model.layers.40.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
332
+ "model.layers.41.input_layernorm.weight": "model-00013-of-00015.safetensors",
333
+ "model.layers.41.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
334
+ "model.layers.41.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
335
+ "model.layers.41.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
336
+ "model.layers.41.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
337
+ "model.layers.41.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
338
+ "model.layers.41.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
339
+ "model.layers.41.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
340
+ "model.layers.41.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
341
+ "model.layers.42.input_layernorm.weight": "model-00013-of-00015.safetensors",
342
+ "model.layers.42.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
343
+ "model.layers.42.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
344
+ "model.layers.42.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
345
+ "model.layers.42.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
346
+ "model.layers.42.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
347
+ "model.layers.42.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
348
+ "model.layers.42.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
349
+ "model.layers.42.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
350
+ "model.layers.43.input_layernorm.weight": "model-00013-of-00015.safetensors",
351
+ "model.layers.43.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
352
+ "model.layers.43.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
353
+ "model.layers.43.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
354
+ "model.layers.43.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
355
+ "model.layers.43.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
356
+ "model.layers.43.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
357
+ "model.layers.43.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
358
+ "model.layers.43.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
359
+ "model.layers.44.input_layernorm.weight": "model-00014-of-00015.safetensors",
360
+ "model.layers.44.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
361
+ "model.layers.44.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
362
+ "model.layers.44.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
363
+ "model.layers.44.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
364
+ "model.layers.44.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
365
+ "model.layers.44.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
366
+ "model.layers.44.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
367
+ "model.layers.44.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
368
+ "model.layers.45.input_layernorm.weight": "model-00014-of-00015.safetensors",
369
+ "model.layers.45.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
370
+ "model.layers.45.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
371
+ "model.layers.45.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
372
+ "model.layers.45.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
373
+ "model.layers.45.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
374
+ "model.layers.45.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
375
+ "model.layers.45.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
376
+ "model.layers.45.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
377
+ "model.layers.46.input_layernorm.weight": "model-00014-of-00015.safetensors",
378
+ "model.layers.46.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
379
+ "model.layers.46.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
380
+ "model.layers.46.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
381
+ "model.layers.46.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
382
+ "model.layers.46.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
383
+ "model.layers.46.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
384
+ "model.layers.46.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
385
+ "model.layers.46.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
386
+ "model.layers.47.input_layernorm.weight": "model-00015-of-00015.safetensors",
387
+ "model.layers.47.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
388
+ "model.layers.47.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
389
+ "model.layers.47.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
390
+ "model.layers.47.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
391
+ "model.layers.47.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
392
+ "model.layers.47.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
393
+ "model.layers.47.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
394
+ "model.layers.47.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
395
+ "model.layers.5.input_layernorm.weight": "model-00003-of-00015.safetensors",
396
+ "model.layers.5.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
397
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
398
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
399
+ "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
400
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
401
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
402
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
403
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
404
+ "model.layers.6.input_layernorm.weight": "model-00003-of-00015.safetensors",
405
+ "model.layers.6.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
406
+ "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
407
+ "model.layers.6.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
408
+ "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
409
+ "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
410
+ "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
411
+ "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
412
+ "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
413
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
414
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
415
+ "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
416
+ "model.layers.7.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
417
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
418
+ "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
419
+ "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
420
+ "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
421
+ "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
422
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
423
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
424
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
425
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
426
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
427
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
428
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
429
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
430
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
431
+ "model.layers.9.input_layernorm.weight": "model-00004-of-00015.safetensors",
432
+ "model.layers.9.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
433
+ "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
434
+ "model.layers.9.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
435
+ "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
436
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
437
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
438
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
439
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
440
+ "model.norm.weight": "model-00015-of-00015.safetensors"
441
+ }
442
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<|pad|>",
5
+ "<|unk|>",
6
+ "<|sep|>",
7
+ "<|mask|>",
8
+ "<|cls|>",
9
+ "<|image|>",
10
+ "<|audio|>",
11
+ "<|user|>",
12
+ "<|system|>",
13
+ "<|assistant|>",
14
+ "<|extra_id_0|>",
15
+ "<|extra_id_1|>",
16
+ "<|extra_id_2|>",
17
+ "<|extra_id_3|>",
18
+ "<|extra_id_4|>",
19
+ "<|extra_id_5|>",
20
+ "<|extra_id_6|>",
21
+ "<|extra_id_7|>",
22
+ "<|extra_id_8|>",
23
+ "<|extra_id_9|>",
24
+ "<|extra_id_10|>",
25
+ "<|extra_id_13|>",
26
+ "<|im_start|>",
27
+ "<|im_sep|>",
28
+ "<|im_end|>",
29
+ "<|resident_reg|>",
30
+ "<|foreigner_reg|>",
31
+ "<|business_reg|>",
32
+ "<|credit_card|>",
33
+ "<|passport|>",
34
+ "<|driver_license|>",
35
+ "<|telephone|>",
36
+ "<|health_insurance|>",
37
+ "<|bank_account|>"
38
+ ],
39
+ "bos_token": {
40
+ "content": "<|endoftext|>",
41
+ "lstrip": false,
42
+ "normalized": false,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "cls_token": {
47
+ "content": "<|cls|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "eos_token": "<|im_end|>",
54
+ "mask_token": {
55
+ "content": "<|mask|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false
60
+ },
61
+ "pad_token": "<|pad|>",
62
+ "sep_token": {
63
+ "content": "<|sep|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false
68
+ },
69
+ "unk_token": {
70
+ "content": "<|unk|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false
75
+ }
76
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<|pad|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<|unk|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<|sep|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "4": {
38
+ "content": "<|mask|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "5": {
46
+ "content": "<|cls|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "6": {
54
+ "content": "<|image|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "7": {
62
+ "content": "<|audio|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "8": {
70
+ "content": "<|user|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "9": {
78
+ "content": "<|system|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "10": {
86
+ "content": "<|assistant|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "11": {
94
+ "content": "<|extra_id_0|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "12": {
102
+ "content": "<|extra_id_1|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "13": {
110
+ "content": "<|extra_id_2|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "14": {
118
+ "content": "<|extra_id_3|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "15": {
126
+ "content": "<|extra_id_4|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": true
132
+ },
133
+ "16": {
134
+ "content": "<|extra_id_5|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": true
140
+ },
141
+ "17": {
142
+ "content": "<|extra_id_6|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": true
148
+ },
149
+ "18": {
150
+ "content": "<|extra_id_7|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "19": {
158
+ "content": "<|extra_id_8|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": true
164
+ },
165
+ "20": {
166
+ "content": "<|extra_id_9|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": true
172
+ },
173
+ "21": {
174
+ "content": "<|extra_id_10|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": true
180
+ },
181
+ "22": {
182
+ "content": "</think>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "23": {
190
+ "content": "<think>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "24": {
198
+ "content": "<|extra_id_13|>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "25": {
206
+ "content": "<|im_start|>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "26": {
214
+ "content": "<|im_sep|>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "27": {
222
+ "content": "<|im_end|>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "28": {
230
+ "content": "<|resident_reg|>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "29": {
238
+ "content": "<|foreigner_reg|>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "30": {
246
+ "content": "<|business_reg|>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "31": {
254
+ "content": "<|credit_card|>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "32": {
262
+ "content": "<|passport|>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "33": {
270
+ "content": "<|driver_license|>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "34": {
278
+ "content": "<|telephone|>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "35": {
286
+ "content": "<|health_insurance|>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ },
293
+ "36": {
294
+ "content": "<|bank_account|>",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": true
300
+ },
301
+ "37": {
302
+ "content": "</tool_output>",
303
+ "lstrip": false,
304
+ "normalized": false,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": false
308
+ },
309
+ "38": {
310
+ "content": "<tool_output>",
311
+ "lstrip": false,
312
+ "normalized": false,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": false
316
+ },
317
+ "39": {
318
+ "content": "</tool_call>",
319
+ "lstrip": false,
320
+ "normalized": false,
321
+ "rstrip": false,
322
+ "single_word": false,
323
+ "special": false
324
+ },
325
+ "40": {
326
+ "content": "<tool_call>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false,
331
+ "special": false
332
+ }
333
+ },
334
+ "additional_special_tokens": [
335
+ "<|endoftext|>",
336
+ "<|pad|>",
337
+ "<|unk|>",
338
+ "<|sep|>",
339
+ "<|mask|>",
340
+ "<|cls|>",
341
+ "<|image|>",
342
+ "<|audio|>",
343
+ "<|user|>",
344
+ "<|system|>",
345
+ "<|assistant|>",
346
+ "<|extra_id_0|>",
347
+ "<|extra_id_1|>",
348
+ "<|extra_id_2|>",
349
+ "<|extra_id_3|>",
350
+ "<|extra_id_4|>",
351
+ "<|extra_id_5|>",
352
+ "<|extra_id_6|>",
353
+ "<|extra_id_7|>",
354
+ "<|extra_id_8|>",
355
+ "<|extra_id_9|>",
356
+ "<|extra_id_10|>",
357
+ "<|extra_id_13|>",
358
+ "<|im_start|>",
359
+ "<|im_sep|>",
360
+ "<|im_end|>",
361
+ "<|resident_reg|>",
362
+ "<|foreigner_reg|>",
363
+ "<|business_reg|>",
364
+ "<|credit_card|>",
365
+ "<|passport|>",
366
+ "<|driver_license|>",
367
+ "<|telephone|>",
368
+ "<|health_insurance|>",
369
+ "<|bank_account|>"
370
+ ],
371
+ "bos_token": "<|endoftext|>",
372
+ "chat_template": "{%- if tools is iterable and tools | length > 0 %}\n {{- '<|im_start|><|system|>'}}\n {{- '당신은 도구 호출 기능을 갖춘 유용한 도우미입니다. 사용자의 요청을 처리하기 위해서 필요한 도구가 주어진 목록에 있는 경우 도구 호출로 응답하세요.\n필요한 도구가 목록에 없는 경우에는 도구 호출 없이 사용자가 요구한 정보를 제공하세요.\n필요한 도구가 목록에 있지만 해당 도구를 호출하는데 필요한 argument 정보가 부족한 경우 해당 정보를 사용자에게 요청하세요.\n사용자의 요청을 처리하기 위해 여러번 도구를 호출할 수 있어야 합니다.\n도구 호출 이후 도구 실행 결과를 입력으로 받으면 해당 결과를 활용하여 답변을 생성하세요.\n\n다음은 접근할 수 있는 도구들의 목록 입니다:\n<tools>\n'}}\n {%- for t in tools %}\n {{- t | tojson }}\n {{- '\n' }}\n {%- endfor %}\n {{- '</tools>' }}\n {{- '\n\n도구를 호출하려면 아래의 JSON으로 응답하세요.\n도구 호출 형식: <tool_call>{\"name\": 도구 이름, \"arguments\": dictionary 형태의 도구 인자값}</tool_call>' }}\n {{- '<|im_end|>' }}\n {%- endif %}\n \n {%- for message in messages %}\n {%- if message.role == 'system' %}\n {{- '<|im_start|><|system|>' + message.content + '<|im_end|>'}}\n {%- elif message.role == 'user' %}\n {{- '<|im_start|><|user|>' + message.content + '<|im_end|>'}}\n {%- elif message.role == 'assistant' %}\n {{- '<|im_start|><|assistant|>'}}\n {%- set content = '' %}\n {%- if message.content is defined %}\n {%- set content = message.content %}\n {%- endif %}\n \n {%- if add_generation_prompt and not (message.reasoning_content is defined and message.reasoning_content is not none) %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>'.strip())[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n \n {{- content}}\n {%- if message.tool_calls is defined %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>' }}\n {{- '{' }}\n {{- '\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\"' }}\n {%- if tool_call.arguments is defined %}\n {{- ', ' }}\n {{- '\"arguments\": ' }}\n {{- tool_call.arguments|tojson }}\n {%- endif %}\n {{- '}' }}\n {{- '</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>'}}\n \n {%- elif message.role == 'tool' %}\n {{- '<|im_start|><|extra_id_13|><tool_output>' + message.content + '</tool_output><|im_end|>'}}\n {%- endif %}\n {%- endfor %}\n \n {%- if add_generation_prompt %}\n {{- '<|im_start|><|assistant|>' }}\n {%- endif %}",
373
+ "clean_up_tokenization_spaces": true,
374
+ "cls_token": "<|cls|>",
375
+ "eod_token": "<|endoftext|>",
376
+ "eos_token": "<|im_end|>",
377
+ "errors": "replace",
378
+ "extra_special_tokens": {},
379
+ "mask_token": "<|mask|>",
380
+ "max_length": 7680,
381
+ "model_max_length": 32768,
382
+ "pad_token": "<|pad|>",
383
+ "sep_token": "<|sep|>",
384
+ "tokenizer_class": "GPT2Tokenizer",
385
+ "unk_token": "<|unk|>",
386
+ "vocab_size": 102400
387
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff