rayliuca commited on
Commit
2e5c1b9
·
verified ·
1 Parent(s): 1808985

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-4204/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
Modelfile ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM HuggingFaceTB/SmolLM3-3B
2
+
3
+ # Use the custom trained model files
4
+ ADAPTER ./adapter_model.safetensors
5
+ PARAMETER temperature 0.7
6
+ PARAMETER top_p 0.9
7
+
8
+ # Add the chat template
9
+ TEMPLATE """{# ───── defaults ───── #}
10
+ {%- if enable_thinking is not defined -%}
11
+ {%- set enable_thinking = true -%}
12
+ {%- endif -%}
13
+
14
+ {# ───── reasoning mode ───── #}
15
+ {%- if enable_thinking -%}
16
+ {%- set reasoning_mode = "/think" -%}
17
+ {%- else -%}
18
+ {%- set reasoning_mode = "/no_think" -%}
19
+ {%- endif -%}
20
+
21
+ {# ───── header (system message) ───── #}
22
+ {{- "<|im_start|>system\n" -}}
23
+
24
+ {%- if messages[0].role == "system" -%}
25
+ {%- set system_message = messages[0].content -%}
26
+ {%- if "/no_think" in system_message -%}
27
+ {%- set reasoning_mode = "/no_think" -%}
28
+ {%- elif "/think" in system_message -%}
29
+ {%- set reasoning_mode = "/think" -%}
30
+ {%- endif -%}
31
+ {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
32
+ {%- endif -%}
33
+
34
+ {%- if "/system_override" in system_message -%}
35
+ {{- custom_instructions.replace("/system_override", "").rstrip() -}}
36
+ {{- "<|im_end|>\n" -}}
37
+ {%- else -%}
38
+ {{- "## Metadata\n\n" -}}
39
+ {{- "Knowledge Cutoff Date: June 2025\n" -}}
40
+ {%- set today = strftime_now("%d %B %Y") -%}
41
+ {{- "Today Date: " ~ today ~ "\n" -}}
42
+ {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
43
+
44
+ {{- "## Custom Instructions\n\n" -}}
45
+ {%- if custom_instructions -%}
46
+ {{- custom_instructions + "\n\n" -}}
47
+ {%- elif reasoning_mode == "/think" -%}
48
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion.\n\n" -}}
49
+ {%- else -%}
50
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
51
+ {%- endif -%}
52
+
53
+ {%- if xml_tools or python_tools or tools -%}
54
+ {{- "### Tools\n\n" -}}
55
+ {%- if xml_tools or tools -%}
56
+ {%- if tools -%}
57
+ {%- set xml_tools = tools -%}
58
+ {%- endif -%}
59
+ {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
60
+ {%- for tool in xml_tools[:] -%} {# The slicing makes sure that xml_tools is a list #}
61
+ {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | string) ~ "\n" -%}
62
+ {%- endfor -%}
63
+ {%- set xml_tool_string = ns.xml_tool_string + "</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>" -%}
64
+ {{- xml_tool_string -}}
65
+ {%- endif -%}
66
+ {%- if python_tools -%}
67
+ {%- set ns = namespace(python_tool_string="When you send a message containing Python code between '<code>' and '</code>' tags, it will be executed in a stateful Jupyter notebook environment, and you will then be given the output to continued reasoning in an agentic loop.\n\nYou can use the following tools in your python code like regular functions:\n<tools>\n") -%}
68
+ {%- for tool in python_tools[:] -%} {# The slicing makes sure that python_tools is a list #}
69
+ {%- set ns.python_tool_string = ns.python_tool_string ~ (tool | string) ~ "\n" -%}
70
+ {%- endfor -%}
71
+ {%- set python_tool_string = ns.python_tool_string + "</tools>\n\nThe state persists between code executions: so variables that you define in one step are still available thereafter." -%}
72
+ {{- python_tool_string -}}
73
+ {%- endif -%}
74
+ {{- "\n\n" -}}
75
+ {{- "<|im_end|>\n" -}}
76
+ {%- endif -%}
77
+ {%- endif -%}
78
+ {# ───── main loop ───── #}
79
+ {%- for message in messages -%}
80
+ {%- set content = message.content if message.content is string else "" -%}
81
+ {%- if message.role == "user" -%}
82
+ {{ "<|im_start|>" + message.role + "\n" + content + "<|im_end|>\n" }}
83
+ {%- elif message.role == "assistant" -%}
84
+ {% generation %}
85
+ {%- if reasoning_mode == "/think" -%}
86
+ {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
87
+ {%- else -%}
88
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
89
+ {%- endif -%}
90
+ {% endgeneration %}
91
+ {%- elif message.role == "tool" -%}
92
+ {{ "<|im_start|>" + "user\n" + content + "<|im_end|>\n" }}
93
+ {%- endif -%}
94
+ {%- endfor -%}
95
+ {# ───── generation prompt ───── #}
96
+ {%- if add_generation_prompt -%}
97
+ {%- if reasoning_mode == "/think" -%}
98
+ {{ "<|im_start|>assistant\n" }}
99
+ {%- else -%}
100
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" }}
101
+ {%- endif -%}
102
+ {%- endif -%}"""
103
+
104
+ # Set system message
105
+ SYSTEM """You are a voice assistant for Home Assistant.
106
+ Answer questions about the world truthfully.
107
+ Answer in plain text. Keep it simple and to the point.
108
+ When controlling Home Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff to unlock a lock. When controlling a device, prefer passing just name and domain. When controlling an area, prefer passing just area name and domain.
109
+ When a user asks to turn on all devices of a specific type, ask user to specify an area, unless there is only one device of that type.
110
+ This device is not able to start timers."""
README.md ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: apache-2.0
4
+ base_model: HuggingFaceTB/SmolLM3-3B
5
+ tags:
6
+ - axolotl
7
+ - base_model:adapter:HuggingFaceTB/SmolLM3-3B
8
+ - lora
9
+ - transformers
10
+ datasets:
11
+ - train_file.jsonl
12
+ pipeline_tag: text-generation
13
+ model-index:
14
+ - name: outputs/smollm3
15
+ results: []
16
+ ---
17
+
18
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
+ should probably proofread and complete it, then remove this comment. -->
20
+
21
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
22
+ <details><summary>See axolotl config</summary>
23
+
24
+ axolotl version: `0.11.0`
25
+ ```yaml
26
+ # axolotl preprocess tasks/smollm3.yml
27
+ # axolotl train tasks/smollm3.yml
28
+ base_model: HuggingFaceTB/SmolLM3-3B
29
+
30
+ #adapter: lora
31
+ chat_template_jinja: |
32
+ {# ───── defaults ───── #}
33
+ {%- if enable_thinking is not defined -%}
34
+ {%- set enable_thinking = true -%}
35
+ {%- endif -%}
36
+
37
+ {# ───── reasoning mode ───── #}
38
+ {%- if enable_thinking -%}
39
+ {%- set reasoning_mode = "/think" -%}
40
+ {%- else -%}
41
+ {%- set reasoning_mode = "/no_think" -%}
42
+ {%- endif -%}
43
+
44
+ {# ───── header (system message) ───── #}
45
+ {{- "<|im_start|>system\n" -}}
46
+
47
+ {%- if messages[0].role == "system" -%}
48
+ {%- set system_message = messages[0].content -%}
49
+ {%- if "/no_think" in system_message -%}
50
+ {%- set reasoning_mode = "/no_think" -%}
51
+ {%- elif "/think" in system_message -%}
52
+ {%- set reasoning_mode = "/think" -%}
53
+ {%- endif -%}
54
+ {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
55
+ {%- endif -%}
56
+
57
+ {%- if "/system_override" in system_message -%}
58
+ {{- custom_instructions.replace("/system_override", "").rstrip() -}}
59
+ {{- "<|im_end|>\n" -}}
60
+ {%- else -%}
61
+ {{- "## Metadata\n\n" -}}
62
+ {{- "Knowledge Cutoff Date: June 2025\n" -}}
63
+ {%- set today = strftime_now("%d %B %Y") -%}
64
+ {{- "Today Date: " ~ today ~ "\n" -}}
65
+ {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
66
+
67
+ {{- "## Custom Instructions\n\n" -}}
68
+ {%- if custom_instructions -%}
69
+ {{- custom_instructions + "\n\n" -}}
70
+ {%- elif reasoning_mode == "/think" -%}
71
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion.\n\n" -}}
72
+ {%- else -%}
73
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
74
+ {%- endif -%}
75
+
76
+ {%- if xml_tools or python_tools or tools -%}
77
+ {{- "### Tools\n\n" -}}
78
+ {%- if xml_tools or tools -%}
79
+ {%- if tools -%}
80
+ {%- set xml_tools = tools -%}
81
+ {%- endif -%}
82
+ {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
83
+ {%- for tool in xml_tools[:] -%} {# The slicing makes sure that xml_tools is a list #}
84
+ {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | string) ~ "\n" -%}
85
+ {%- endfor -%}
86
+ {%- set xml_tool_string = ns.xml_tool_string + "</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>" -%}
87
+ {{- xml_tool_string -}}
88
+ {%- endif -%}
89
+ {%- if python_tools -%}
90
+ {%- set ns = namespace(python_tool_string="When you send a message containing Python code between '<code>' and '</code>' tags, it will be executed in a stateful Jupyter notebook environment, and you will then be given the output to continued reasoning in an agentic loop.\n\nYou can use the following tools in your python code like regular functions:\n<tools>\n") -%}
91
+ {%- for tool in python_tools[:] -%} {# The slicing makes sure that python_tools is a list #}
92
+ {%- set ns.python_tool_string = ns.python_tool_string ~ (tool | string) ~ "\n" -%}
93
+ {%- endfor -%}
94
+ {%- set python_tool_string = ns.python_tool_string + "</tools>\n\nThe state persists between code executions: so variables that you define in one step are still available thereafter." -%}
95
+ {{- python_tool_string -}}
96
+ {%- endif -%}
97
+ {{- "\n\n" -}}
98
+ {{- "<|im_end|>\n" -}}
99
+ {%- endif -%}
100
+ {%- endif -%}
101
+ {# ───── main loop ───── #}
102
+ {%- for message in messages -%}
103
+ {%- set content = message.content if message.content is string else "" -%}
104
+ {%- if message.role == "user" -%}
105
+ {{ "<|im_start|>" + message.role + "\n" + content + "<|im_end|>\n" }}
106
+ {%- elif message.role == "assistant" -%}
107
+ {% generation %}
108
+ {%- if reasoning_mode == "/think" -%}
109
+ {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
110
+ {%- else -%}
111
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
112
+ {%- endif -%}
113
+ {% endgeneration %}
114
+ {%- elif message.role == "tool" -%}
115
+ {{ "<|im_start|>" + "assistant\n" + content + "<|im_end|>\n" }}
116
+ {%- endif -%}
117
+ {%- endfor -%}
118
+ {# ───── generation prompt ───── #}
119
+ {%- if add_generation_prompt -%}
120
+ {%- if reasoning_mode == "/think" -%}
121
+ {{ "<|im_start|>assistant\n" }}
122
+ {%- else -%}
123
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" }}
124
+ {%- endif -%}
125
+ {%- endif -%}
126
+ datasets:
127
+ - path: train_file.jsonl
128
+ type: chat_template
129
+
130
+ dataset_prepared_path: last_run_prepared
131
+ val_set_size: 0.01
132
+ output_dir: ./outputs/smollm3
133
+
134
+ roles_to_train:
135
+ - assistant
136
+ - tool
137
+
138
+ adapter: lora
139
+ lora_r: 16
140
+ lora_alpha: 32
141
+ lora_dropout: 0.05
142
+ lora_target_linear: true
143
+
144
+ num_epochs: 2
145
+ learning_rate: 1e-4
146
+ optimizer: adamw_torch
147
+ lr_scheduler: cosine
148
+ flash_attention: true
149
+ sequence_len: 4096
150
+ logging_steps: 20
151
+ warmup_steps: 100
152
+ save_steps: 500
153
+ eval_steps: 500
154
+ save_total_limit: 2
155
+
156
+ batch_size: 1
157
+ micro_batch_size: 1
158
+ ```
159
+
160
+ </details><br>
161
+
162
+ # outputs/smollm3
163
+
164
+ This model is a fine-tuned version of [HuggingFaceTB/SmolLM3-3B](https://huggingface.co/HuggingFaceTB/SmolLM3-3B) on the train_file.jsonl dataset.
165
+ It achieves the following results on the evaluation set:
166
+ - Loss: nan
167
+
168
+ ## Model description
169
+
170
+ More information needed
171
+
172
+ ## Intended uses & limitations
173
+
174
+ More information needed
175
+
176
+ ## Training and evaluation data
177
+
178
+ More information needed
179
+
180
+ ## Training procedure
181
+
182
+ ### Training hyperparameters
183
+
184
+ The following hyperparameters were used during training:
185
+ - learning_rate: 0.0001
186
+ - train_batch_size: 1
187
+ - eval_batch_size: 1
188
+ - seed: 42
189
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
190
+ - lr_scheduler_type: cosine
191
+ - lr_scheduler_warmup_steps: 100
192
+ - training_steps: 4204
193
+
194
+ ### Training results
195
+
196
+ | Training Loss | Epoch | Step | Validation Loss |
197
+ |:-------------:|:------:|:----:|:---------------:|
198
+ | No log | 0 | 0 | nan |
199
+ | 0.2444 | 0.2379 | 500 | nan |
200
+ | 0.0655 | 0.4757 | 1000 | nan |
201
+ | 0.7555 | 0.7136 | 1500 | nan |
202
+ | 0.3809 | 0.9515 | 2000 | nan |
203
+ | 0.0237 | 1.1893 | 2500 | nan |
204
+ | 0.0749 | 1.4272 | 3000 | nan |
205
+ | 0.0896 | 1.6651 | 3500 | nan |
206
+ | 0.0 | 1.9029 | 4000 | nan |
207
+
208
+
209
+ ### Framework versions
210
+
211
+ - PEFT 0.16.0
212
+ - Transformers 4.53.3
213
+ - Pytorch 2.7.1+cu128
214
+ - Datasets 3.6.0
215
+ - Tokenizers 0.21.2
adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceTB/SmolLM3-3B",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "up_proj",
29
+ "gate_proj",
30
+ "k_proj",
31
+ "q_proj",
32
+ "v_proj",
33
+ "down_proj",
34
+ "o_proj"
35
+ ],
36
+ "task_type": "CAUSAL_LM",
37
+ "trainable_token_indices": null,
38
+ "use_dora": false,
39
+ "use_qalora": false,
40
+ "use_rslora": false
41
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5f9c3bddde813745a86cb601f9d1069128c55abadd89820433b1d99a9baaa6
3
+ size 120981200
chat_template.jinja ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ───── defaults ───── #}
2
+ {%- if enable_thinking is not defined -%}
3
+ {%- set enable_thinking = true -%}
4
+ {%- endif -%}
5
+
6
+ {# ───── reasoning mode ───── #}
7
+ {%- if enable_thinking -%}
8
+ {%- set reasoning_mode = "/think" -%}
9
+ {%- else -%}
10
+ {%- set reasoning_mode = "/no_think" -%}
11
+ {%- endif -%}
12
+
13
+ {# ───── header (system message) ───── #}
14
+ {{- "<|im_start|>system\n" -}}
15
+
16
+ {%- if messages[0].role == "system" -%}
17
+ {%- set system_message = messages[0].content -%}
18
+ {%- if "/no_think" in system_message -%}
19
+ {%- set reasoning_mode = "/no_think" -%}
20
+ {%- elif "/think" in system_message -%}
21
+ {%- set reasoning_mode = "/think" -%}
22
+ {%- endif -%}
23
+ {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
24
+ {%- endif -%}
25
+
26
+ {%- if "/system_override" in system_message -%}
27
+ {{- custom_instructions.replace("/system_override", "").rstrip() -}}
28
+ {{- "<|im_end|>\n" -}}
29
+ {%- else -%}
30
+ {{- "## Metadata\n\n" -}}
31
+ {{- "Knowledge Cutoff Date: June 2025\n" -}}
32
+ {%- set today = strftime_now("%d %B %Y") -%}
33
+ {{- "Today Date: " ~ today ~ "\n" -}}
34
+ {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
35
+
36
+ {{- "## Custom Instructions\n\n" -}}
37
+ {%- if custom_instructions -%}
38
+ {{- custom_instructions + "\n\n" -}}
39
+ {%- elif reasoning_mode == "/think" -%}
40
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion.\n\n" -}}
41
+ {%- else -%}
42
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
43
+ {%- endif -%}
44
+
45
+ {%- if xml_tools or python_tools or tools -%}
46
+ {{- "### Tools\n\n" -}}
47
+ {%- if xml_tools or tools -%}
48
+ {%- if tools -%}
49
+ {%- set xml_tools = tools -%}
50
+ {%- endif -%}
51
+ {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
52
+ {%- for tool in xml_tools[:] -%} {# The slicing makes sure that xml_tools is a list #}
53
+ {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | string) ~ "\n" -%}
54
+ {%- endfor -%}
55
+ {%- set xml_tool_string = ns.xml_tool_string + "</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>" -%}
56
+ {{- xml_tool_string -}}
57
+ {%- endif -%}
58
+ {%- if python_tools -%}
59
+ {%- set ns = namespace(python_tool_string="When you send a message containing Python code between '<code>' and '</code>' tags, it will be executed in a stateful Jupyter notebook environment, and you will then be given the output to continued reasoning in an agentic loop.\n\nYou can use the following tools in your python code like regular functions:\n<tools>\n") -%}
60
+ {%- for tool in python_tools[:] -%} {# The slicing makes sure that python_tools is a list #}
61
+ {%- set ns.python_tool_string = ns.python_tool_string ~ (tool | string) ~ "\n" -%}
62
+ {%- endfor -%}
63
+ {%- set python_tool_string = ns.python_tool_string + "</tools>\n\nThe state persists between code executions: so variables that you define in one step are still available thereafter." -%}
64
+ {{- python_tool_string -}}
65
+ {%- endif -%}
66
+ {{- "\n\n" -}}
67
+ {{- "<|im_end|>\n" -}}
68
+ {%- endif -%}
69
+ {%- endif -%}
70
+ {# ───── main loop ───── #}
71
+ {%- for message in messages -%}
72
+ {%- set content = message.content if message.content is string else "" -%}
73
+ {%- if message.role == "user" -%}
74
+ {{ "<|im_start|>" + message.role + "\n" + content + "<|im_end|>\n" }}
75
+ {%- elif message.role == "assistant" -%}
76
+ {% generation %}
77
+ {%- if reasoning_mode == "/think" -%}
78
+ {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
79
+ {%- else -%}
80
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
81
+ {%- endif -%}
82
+ {% endgeneration %}
83
+ {%- elif message.role == "tool" -%}
84
+ {{ "<|im_start|>" + "assistant\n" + content + "<|im_end|>\n" }}
85
+ {%- endif -%}
86
+ {%- endfor -%}
87
+ {# ───── generation prompt ───── #}
88
+ {%- if add_generation_prompt -%}
89
+ {%- if reasoning_mode == "/think" -%}
90
+ {{ "<|im_start|>assistant\n" }}
91
+ {%- else -%}
92
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" }}
93
+ {%- endif -%}
94
+ {%- endif -%}
checkpoint-4000/README.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: HuggingFaceTB/SmolLM3-3B
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - axolotl
7
+ - base_model:adapter:HuggingFaceTB/SmolLM3-3B
8
+ - lora
9
+ - transformers
10
+ ---
11
+
12
+ # Model Card for Model ID
13
+
14
+ <!-- Provide a quick summary of what the model is/does. -->
15
+
16
+
17
+
18
+ ## Model Details
19
+
20
+ ### Model Description
21
+
22
+ <!-- Provide a longer summary of what this model is. -->
23
+
24
+
25
+
26
+ - **Developed by:** [More Information Needed]
27
+ - **Funded by [optional]:** [More Information Needed]
28
+ - **Shared by [optional]:** [More Information Needed]
29
+ - **Model type:** [More Information Needed]
30
+ - **Language(s) (NLP):** [More Information Needed]
31
+ - **License:** [More Information Needed]
32
+ - **Finetuned from model [optional]:** [More Information Needed]
33
+
34
+ ### Model Sources [optional]
35
+
36
+ <!-- Provide the basic links for the model. -->
37
+
38
+ - **Repository:** [More Information Needed]
39
+ - **Paper [optional]:** [More Information Needed]
40
+ - **Demo [optional]:** [More Information Needed]
41
+
42
+ ## Uses
43
+
44
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
45
+
46
+ ### Direct Use
47
+
48
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Downstream Use [optional]
53
+
54
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
55
+
56
+ [More Information Needed]
57
+
58
+ ### Out-of-Scope Use
59
+
60
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ## Bias, Risks, and Limitations
65
+
66
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
67
+
68
+ [More Information Needed]
69
+
70
+ ### Recommendations
71
+
72
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
73
+
74
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
75
+
76
+ ## How to Get Started with the Model
77
+
78
+ Use the code below to get started with the model.
79
+
80
+ [More Information Needed]
81
+
82
+ ## Training Details
83
+
84
+ ### Training Data
85
+
86
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
87
+
88
+ [More Information Needed]
89
+
90
+ ### Training Procedure
91
+
92
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
93
+
94
+ #### Preprocessing [optional]
95
+
96
+ [More Information Needed]
97
+
98
+
99
+ #### Training Hyperparameters
100
+
101
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
102
+
103
+ #### Speeds, Sizes, Times [optional]
104
+
105
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
106
+
107
+ [More Information Needed]
108
+
109
+ ## Evaluation
110
+
111
+ <!-- This section describes the evaluation protocols and provides the results. -->
112
+
113
+ ### Testing Data, Factors & Metrics
114
+
115
+ #### Testing Data
116
+
117
+ <!-- This should link to a Dataset Card if possible. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Factors
122
+
123
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
124
+
125
+ [More Information Needed]
126
+
127
+ #### Metrics
128
+
129
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
130
+
131
+ [More Information Needed]
132
+
133
+ ### Results
134
+
135
+ [More Information Needed]
136
+
137
+ #### Summary
138
+
139
+
140
+
141
+ ## Model Examination [optional]
142
+
143
+ <!-- Relevant interpretability work for the model goes here -->
144
+
145
+ [More Information Needed]
146
+
147
+ ## Environmental Impact
148
+
149
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
150
+
151
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
152
+
153
+ - **Hardware Type:** [More Information Needed]
154
+ - **Hours used:** [More Information Needed]
155
+ - **Cloud Provider:** [More Information Needed]
156
+ - **Compute Region:** [More Information Needed]
157
+ - **Carbon Emitted:** [More Information Needed]
158
+
159
+ ## Technical Specifications [optional]
160
+
161
+ ### Model Architecture and Objective
162
+
163
+ [More Information Needed]
164
+
165
+ ### Compute Infrastructure
166
+
167
+ [More Information Needed]
168
+
169
+ #### Hardware
170
+
171
+ [More Information Needed]
172
+
173
+ #### Software
174
+
175
+ [More Information Needed]
176
+
177
+ ## Citation [optional]
178
+
179
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
180
+
181
+ **BibTeX:**
182
+
183
+ [More Information Needed]
184
+
185
+ **APA:**
186
+
187
+ [More Information Needed]
188
+
189
+ ## Glossary [optional]
190
+
191
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
192
+
193
+ [More Information Needed]
194
+
195
+ ## More Information [optional]
196
+
197
+ [More Information Needed]
198
+
199
+ ## Model Card Authors [optional]
200
+
201
+ [More Information Needed]
202
+
203
+ ## Model Card Contact
204
+
205
+ [More Information Needed]
206
+ ### Framework versions
207
+
208
+ - PEFT 0.16.0
checkpoint-4000/adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceTB/SmolLM3-3B",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "up_proj",
29
+ "gate_proj",
30
+ "k_proj",
31
+ "q_proj",
32
+ "v_proj",
33
+ "down_proj",
34
+ "o_proj"
35
+ ],
36
+ "task_type": "CAUSAL_LM",
37
+ "trainable_token_indices": null,
38
+ "use_dora": false,
39
+ "use_qalora": false,
40
+ "use_rslora": false
41
+ }
checkpoint-4000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35f65df3957596b103f952b86477d6e377eed392dfe6f168180e54daeb0483a2
3
+ size 120981200
checkpoint-4000/chat_template.jinja ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ───── defaults ───── #}
2
+ {%- if enable_thinking is not defined -%}
3
+ {%- set enable_thinking = true -%}
4
+ {%- endif -%}
5
+
6
+ {# ───── reasoning mode ───── #}
7
+ {%- if enable_thinking -%}
8
+ {%- set reasoning_mode = "/think" -%}
9
+ {%- else -%}
10
+ {%- set reasoning_mode = "/no_think" -%}
11
+ {%- endif -%}
12
+
13
+ {# ───── header (system message) ───── #}
14
+ {{- "<|im_start|>system\n" -}}
15
+
16
+ {%- if messages[0].role == "system" -%}
17
+ {%- set system_message = messages[0].content -%}
18
+ {%- if "/no_think" in system_message -%}
19
+ {%- set reasoning_mode = "/no_think" -%}
20
+ {%- elif "/think" in system_message -%}
21
+ {%- set reasoning_mode = "/think" -%}
22
+ {%- endif -%}
23
+ {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
24
+ {%- endif -%}
25
+
26
+ {%- if "/system_override" in system_message -%}
27
+ {{- custom_instructions.replace("/system_override", "").rstrip() -}}
28
+ {{- "<|im_end|>\n" -}}
29
+ {%- else -%}
30
+ {{- "## Metadata\n\n" -}}
31
+ {{- "Knowledge Cutoff Date: June 2025\n" -}}
32
+ {%- set today = strftime_now("%d %B %Y") -%}
33
+ {{- "Today Date: " ~ today ~ "\n" -}}
34
+ {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
35
+
36
+ {{- "## Custom Instructions\n\n" -}}
37
+ {%- if custom_instructions -%}
38
+ {{- custom_instructions + "\n\n" -}}
39
+ {%- elif reasoning_mode == "/think" -%}
40
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion.\n\n" -}}
41
+ {%- else -%}
42
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
43
+ {%- endif -%}
44
+
45
+ {%- if xml_tools or python_tools or tools -%}
46
+ {{- "### Tools\n\n" -}}
47
+ {%- if xml_tools or tools -%}
48
+ {%- if tools -%}
49
+ {%- set xml_tools = tools -%}
50
+ {%- endif -%}
51
+ {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
52
+ {%- for tool in xml_tools[:] -%} {# The slicing makes sure that xml_tools is a list #}
53
+ {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | string) ~ "\n" -%}
54
+ {%- endfor -%}
55
+ {%- set xml_tool_string = ns.xml_tool_string + "</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>" -%}
56
+ {{- xml_tool_string -}}
57
+ {%- endif -%}
58
+ {%- if python_tools -%}
59
+ {%- set ns = namespace(python_tool_string="When you send a message containing Python code between '<code>' and '</code>' tags, it will be executed in a stateful Jupyter notebook environment, and you will then be given the output to continued reasoning in an agentic loop.\n\nYou can use the following tools in your python code like regular functions:\n<tools>\n") -%}
60
+ {%- for tool in python_tools[:] -%} {# The slicing makes sure that python_tools is a list #}
61
+ {%- set ns.python_tool_string = ns.python_tool_string ~ (tool | string) ~ "\n" -%}
62
+ {%- endfor -%}
63
+ {%- set python_tool_string = ns.python_tool_string + "</tools>\n\nThe state persists between code executions: so variables that you define in one step are still available thereafter." -%}
64
+ {{- python_tool_string -}}
65
+ {%- endif -%}
66
+ {{- "\n\n" -}}
67
+ {{- "<|im_end|>\n" -}}
68
+ {%- endif -%}
69
+ {%- endif -%}
70
+ {# ───── main loop ───── #}
71
+ {%- for message in messages -%}
72
+ {%- set content = message.content if message.content is string else "" -%}
73
+ {%- if message.role == "user" -%}
74
+ {{ "<|im_start|>" + message.role + "\n" + content + "<|im_end|>\n" }}
75
+ {%- elif message.role == "assistant" -%}
76
+ {% generation %}
77
+ {%- if reasoning_mode == "/think" -%}
78
+ {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
79
+ {%- else -%}
80
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
81
+ {%- endif -%}
82
+ {% endgeneration %}
83
+ {%- elif message.role == "tool" -%}
84
+ {{ "<|im_start|>" + "assistant\n" + content + "<|im_end|>\n" }}
85
+ {%- endif -%}
86
+ {%- endfor -%}
87
+ {# ───── generation prompt ───── #}
88
+ {%- if add_generation_prompt -%}
89
+ {%- if reasoning_mode == "/think" -%}
90
+ {{ "<|im_start|>assistant\n" }}
91
+ {%- else -%}
92
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" }}
93
+ {%- endif -%}
94
+ {%- endif -%}
checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aaf4d87538fd14bbd25d8aad4b6e49e8bd7ea6991ab98ba32c2a78f772db628
3
+ size 242252619
checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aa2de77f238bb260dc729355736bb4b246a7e99f90c8d784ab2f63c8e703887
3
+ size 14645
checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb618c1d5fd7d1f63218c89e181acbb18671f8f3e27ec18ff8c0a794c3d7f67b
3
+ size 1465
checkpoint-4000/special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "<|im_end|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
+ }
checkpoint-4000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6a500b662a34eb3f0374db856ba4ad7de4c81040571d78dc0d357238930005
3
+ size 17208819
checkpoint-4000/tokenizer_config.json ADDED
@@ -0,0 +1,2064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<think>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "128003": {
28
+ "content": "</think>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|im_start|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|im_end|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<tool_response>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "128014": {
116
+ "content": "</tool_response>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "128015": {
124
+ "content": "<tool_call>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "128016": {
132
+ "content": "</tool_call>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "128017": {
140
+ "content": "<code>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "128018": {
148
+ "content": "</code>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": null,
2053
+ "clean_up_tokenization_spaces": true,
2054
+ "eos_token": "<|im_end|>",
2055
+ "extra_special_tokens": {},
2056
+ "fast": false,
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
+ "model_max_length": 131072,
2062
+ "pad_token": "<|im_end|>",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
+ }
checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,1506 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.9029495718363463,
6
+ "eval_steps": 500,
7
+ "global_step": 4000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0,
14
+ "eval_loss": NaN,
15
+ "eval_runtime": 4.5948,
16
+ "eval_samples_per_second": 4.788,
17
+ "eval_steps_per_second": 4.788,
18
+ "step": 0
19
+ },
20
+ {
21
+ "epoch": 0.009514747859181731,
22
+ "grad_norm": 0.0,
23
+ "learning_rate": 1.9e-05,
24
+ "loss": 2.0934,
25
+ "step": 20
26
+ },
27
+ {
28
+ "epoch": 0.019029495718363463,
29
+ "grad_norm": 0.0,
30
+ "learning_rate": 3.9000000000000006e-05,
31
+ "loss": 0.4738,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.028544243577545196,
36
+ "grad_norm": 0.0,
37
+ "learning_rate": 5.9e-05,
38
+ "loss": 0.8557,
39
+ "step": 60
40
+ },
41
+ {
42
+ "epoch": 0.038058991436726926,
43
+ "grad_norm": 0.0,
44
+ "learning_rate": 7.900000000000001e-05,
45
+ "loss": 0.3978,
46
+ "step": 80
47
+ },
48
+ {
49
+ "epoch": 0.047573739295908656,
50
+ "grad_norm": 0.0,
51
+ "learning_rate": 9.900000000000001e-05,
52
+ "loss": 0.4128,
53
+ "step": 100
54
+ },
55
+ {
56
+ "epoch": 0.05708848715509039,
57
+ "grad_norm": 0.0,
58
+ "learning_rate": 9.999471159635539e-05,
59
+ "loss": 0.148,
60
+ "step": 120
61
+ },
62
+ {
63
+ "epoch": 0.06660323501427212,
64
+ "grad_norm": 0.0,
65
+ "learning_rate": 9.997771965008657e-05,
66
+ "loss": 0.3554,
67
+ "step": 140
68
+ },
69
+ {
70
+ "epoch": 0.07611798287345385,
71
+ "grad_norm": 0.0,
72
+ "learning_rate": 9.994901349433758e-05,
73
+ "loss": 1.0388,
74
+ "step": 160
75
+ },
76
+ {
77
+ "epoch": 0.08563273073263558,
78
+ "grad_norm": 7.0538105964660645,
79
+ "learning_rate": 9.990859985750506e-05,
80
+ "loss": 0.6268,
81
+ "step": 180
82
+ },
83
+ {
84
+ "epoch": 0.09514747859181731,
85
+ "grad_norm": 6.37161111831665,
86
+ "learning_rate": 9.985648821208616e-05,
87
+ "loss": 0.1791,
88
+ "step": 200
89
+ },
90
+ {
91
+ "epoch": 0.10466222645099905,
92
+ "grad_norm": 0.0,
93
+ "learning_rate": 9.979269077245831e-05,
94
+ "loss": 0.0,
95
+ "step": 220
96
+ },
97
+ {
98
+ "epoch": 0.11417697431018078,
99
+ "grad_norm": 0.0,
100
+ "learning_rate": 9.97172224920163e-05,
101
+ "loss": 0.4112,
102
+ "step": 240
103
+ },
104
+ {
105
+ "epoch": 0.12369172216936251,
106
+ "grad_norm": 4.980894565582275,
107
+ "learning_rate": 9.963010105966736e-05,
108
+ "loss": 0.2527,
109
+ "step": 260
110
+ },
111
+ {
112
+ "epoch": 0.13320647002854424,
113
+ "grad_norm": 0.0,
114
+ "learning_rate": 9.953134689568506e-05,
115
+ "loss": 0.639,
116
+ "step": 280
117
+ },
118
+ {
119
+ "epoch": 0.142721217887726,
120
+ "grad_norm": 0.0,
121
+ "learning_rate": 9.94209831469231e-05,
122
+ "loss": 0.5911,
123
+ "step": 300
124
+ },
125
+ {
126
+ "epoch": 0.1522359657469077,
127
+ "grad_norm": 0.0,
128
+ "learning_rate": 9.929903568138989e-05,
129
+ "loss": 0.0966,
130
+ "step": 320
131
+ },
132
+ {
133
+ "epoch": 0.16175071360608945,
134
+ "grad_norm": 0.0,
135
+ "learning_rate": 9.916553308218537e-05,
136
+ "loss": 0.4412,
137
+ "step": 340
138
+ },
139
+ {
140
+ "epoch": 0.17126546146527116,
141
+ "grad_norm": 0.0,
142
+ "learning_rate": 9.902050664080152e-05,
143
+ "loss": 0.3402,
144
+ "step": 360
145
+ },
146
+ {
147
+ "epoch": 0.1807802093244529,
148
+ "grad_norm": 0.0,
149
+ "learning_rate": 9.886399034978797e-05,
150
+ "loss": 0.1149,
151
+ "step": 380
152
+ },
153
+ {
154
+ "epoch": 0.19029495718363462,
155
+ "grad_norm": 0.0,
156
+ "learning_rate": 9.869602089478447e-05,
157
+ "loss": 0.1225,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 0.19980970504281637,
162
+ "grad_norm": 0.0,
163
+ "learning_rate": 9.85166376459223e-05,
164
+ "loss": 0.0718,
165
+ "step": 420
166
+ },
167
+ {
168
+ "epoch": 0.2093244529019981,
169
+ "grad_norm": 0.0,
170
+ "learning_rate": 9.832588264859624e-05,
171
+ "loss": 0.3826,
172
+ "step": 440
173
+ },
174
+ {
175
+ "epoch": 0.21883920076117983,
176
+ "grad_norm": 3.183824300765991,
177
+ "learning_rate": 9.81238006136097e-05,
178
+ "loss": 0.4913,
179
+ "step": 460
180
+ },
181
+ {
182
+ "epoch": 0.22835394862036157,
183
+ "grad_norm": 0.0,
184
+ "learning_rate": 9.7910438906695e-05,
185
+ "loss": 0.0751,
186
+ "step": 480
187
+ },
188
+ {
189
+ "epoch": 0.23786869647954328,
190
+ "grad_norm": 0.0,
191
+ "learning_rate": 9.768584753741134e-05,
192
+ "loss": 0.2444,
193
+ "step": 500
194
+ },
195
+ {
196
+ "epoch": 0.23786869647954328,
197
+ "eval_loss": NaN,
198
+ "eval_runtime": 8.8015,
199
+ "eval_samples_per_second": 2.5,
200
+ "eval_steps_per_second": 2.5,
201
+ "step": 500
202
+ },
203
+ {
204
+ "epoch": 0.24738344433872503,
205
+ "grad_norm": 0.0,
206
+ "learning_rate": 9.745007914742316e-05,
207
+ "loss": 0.1088,
208
+ "step": 520
209
+ },
210
+ {
211
+ "epoch": 0.25689819219790677,
212
+ "grad_norm": 0.0,
213
+ "learning_rate": 9.720318899816155e-05,
214
+ "loss": 0.1922,
215
+ "step": 540
216
+ },
217
+ {
218
+ "epoch": 0.2664129400570885,
219
+ "grad_norm": 0.0,
220
+ "learning_rate": 9.694523495787149e-05,
221
+ "loss": 0.2443,
222
+ "step": 560
223
+ },
224
+ {
225
+ "epoch": 0.2759276879162702,
226
+ "grad_norm": 0.0,
227
+ "learning_rate": 9.667627748804835e-05,
228
+ "loss": 0.0,
229
+ "step": 580
230
+ },
231
+ {
232
+ "epoch": 0.285442435775452,
233
+ "grad_norm": 0.0,
234
+ "learning_rate": 9.639637962926633e-05,
235
+ "loss": 0.6168,
236
+ "step": 600
237
+ },
238
+ {
239
+ "epoch": 0.2949571836346337,
240
+ "grad_norm": 0.0,
241
+ "learning_rate": 9.610560698640241e-05,
242
+ "loss": 0.249,
243
+ "step": 620
244
+ },
245
+ {
246
+ "epoch": 0.3044719314938154,
247
+ "grad_norm": 0.0,
248
+ "learning_rate": 9.580402771325941e-05,
249
+ "loss": 0.1383,
250
+ "step": 640
251
+ },
252
+ {
253
+ "epoch": 0.3139866793529971,
254
+ "grad_norm": 0.0,
255
+ "learning_rate": 9.549171249659145e-05,
256
+ "loss": 0.0658,
257
+ "step": 660
258
+ },
259
+ {
260
+ "epoch": 0.3235014272121789,
261
+ "grad_norm": 0.0,
262
+ "learning_rate": 9.51687345395358e-05,
263
+ "loss": 0.7962,
264
+ "step": 680
265
+ },
266
+ {
267
+ "epoch": 0.3330161750713606,
268
+ "grad_norm": 0.0,
269
+ "learning_rate": 9.483516954445489e-05,
270
+ "loss": 0.1979,
271
+ "step": 700
272
+ },
273
+ {
274
+ "epoch": 0.3425309229305423,
275
+ "grad_norm": 0.0,
276
+ "learning_rate": 9.449109569519253e-05,
277
+ "loss": 0.0846,
278
+ "step": 720
279
+ },
280
+ {
281
+ "epoch": 0.3520456707897241,
282
+ "grad_norm": 0.0,
283
+ "learning_rate": 9.413659363874853e-05,
284
+ "loss": 0.0,
285
+ "step": 740
286
+ },
287
+ {
288
+ "epoch": 0.3615604186489058,
289
+ "grad_norm": 0.0,
290
+ "learning_rate": 9.377174646637597e-05,
291
+ "loss": 0.6378,
292
+ "step": 760
293
+ },
294
+ {
295
+ "epoch": 0.37107516650808753,
296
+ "grad_norm": 0.0,
297
+ "learning_rate": 9.339663969410546e-05,
298
+ "loss": 0.1937,
299
+ "step": 780
300
+ },
301
+ {
302
+ "epoch": 0.38058991436726924,
303
+ "grad_norm": 0.0,
304
+ "learning_rate": 9.301136124270126e-05,
305
+ "loss": 0.1475,
306
+ "step": 800
307
+ },
308
+ {
309
+ "epoch": 0.390104662226451,
310
+ "grad_norm": 0.0,
311
+ "learning_rate": 9.261600141705367e-05,
312
+ "loss": 0.4155,
313
+ "step": 820
314
+ },
315
+ {
316
+ "epoch": 0.39961941008563273,
317
+ "grad_norm": 0.0,
318
+ "learning_rate": 9.221065288501245e-05,
319
+ "loss": 0.4923,
320
+ "step": 840
321
+ },
322
+ {
323
+ "epoch": 0.40913415794481445,
324
+ "grad_norm": 0.6096606254577637,
325
+ "learning_rate": 9.179541065566664e-05,
326
+ "loss": 0.1229,
327
+ "step": 860
328
+ },
329
+ {
330
+ "epoch": 0.4186489058039962,
331
+ "grad_norm": 0.0,
332
+ "learning_rate": 9.137037205707552e-05,
333
+ "loss": 0.0,
334
+ "step": 880
335
+ },
336
+ {
337
+ "epoch": 0.42816365366317793,
338
+ "grad_norm": 0.0,
339
+ "learning_rate": 9.093563671345598e-05,
340
+ "loss": 0.165,
341
+ "step": 900
342
+ },
343
+ {
344
+ "epoch": 0.43767840152235965,
345
+ "grad_norm": 0.0,
346
+ "learning_rate": 9.049130652183166e-05,
347
+ "loss": 0.6362,
348
+ "step": 920
349
+ },
350
+ {
351
+ "epoch": 0.44719314938154137,
352
+ "grad_norm": 0.0,
353
+ "learning_rate": 9.003748562814962e-05,
354
+ "loss": 0.0,
355
+ "step": 940
356
+ },
357
+ {
358
+ "epoch": 0.45670789724072314,
359
+ "grad_norm": 0.0,
360
+ "learning_rate": 8.957428040286959e-05,
361
+ "loss": 0.0507,
362
+ "step": 960
363
+ },
364
+ {
365
+ "epoch": 0.46622264509990485,
366
+ "grad_norm": 0.0,
367
+ "learning_rate": 8.910179941603193e-05,
368
+ "loss": 0.0257,
369
+ "step": 980
370
+ },
371
+ {
372
+ "epoch": 0.47573739295908657,
373
+ "grad_norm": 4.7469587326049805,
374
+ "learning_rate": 8.862015341181018e-05,
375
+ "loss": 0.0655,
376
+ "step": 1000
377
+ },
378
+ {
379
+ "epoch": 0.47573739295908657,
380
+ "eval_loss": NaN,
381
+ "eval_runtime": 7.7821,
382
+ "eval_samples_per_second": 2.827,
383
+ "eval_steps_per_second": 2.827,
384
+ "step": 1000
385
+ },
386
+ {
387
+ "epoch": 0.48525214081826834,
388
+ "grad_norm": 3.7392077445983887,
389
+ "learning_rate": 8.812945528255368e-05,
390
+ "loss": 0.4831,
391
+ "step": 1020
392
+ },
393
+ {
394
+ "epoch": 0.49476688867745006,
395
+ "grad_norm": 0.0,
396
+ "learning_rate": 8.762982004232709e-05,
397
+ "loss": 0.0065,
398
+ "step": 1040
399
+ },
400
+ {
401
+ "epoch": 0.5042816365366318,
402
+ "grad_norm": 0.0,
403
+ "learning_rate": 8.712136479995226e-05,
404
+ "loss": 0.0152,
405
+ "step": 1060
406
+ },
407
+ {
408
+ "epoch": 0.5137963843958135,
409
+ "grad_norm": 0.0,
410
+ "learning_rate": 8.66042087315593e-05,
411
+ "loss": 0.079,
412
+ "step": 1080
413
+ },
414
+ {
415
+ "epoch": 0.5233111322549953,
416
+ "grad_norm": 0.0,
417
+ "learning_rate": 8.60784730526531e-05,
418
+ "loss": 1.2652,
419
+ "step": 1100
420
+ },
421
+ {
422
+ "epoch": 0.532825880114177,
423
+ "grad_norm": 0.0,
424
+ "learning_rate": 8.554428098970171e-05,
425
+ "loss": 0.0,
426
+ "step": 1120
427
+ },
428
+ {
429
+ "epoch": 0.5423406279733587,
430
+ "grad_norm": 0.0,
431
+ "learning_rate": 8.50017577512535e-05,
432
+ "loss": 0.0428,
433
+ "step": 1140
434
+ },
435
+ {
436
+ "epoch": 0.5518553758325404,
437
+ "grad_norm": 2.9529595375061035,
438
+ "learning_rate": 8.445103049858966e-05,
439
+ "loss": 0.3298,
440
+ "step": 1160
441
+ },
442
+ {
443
+ "epoch": 0.5613701236917221,
444
+ "grad_norm": 0.0,
445
+ "learning_rate": 8.389222831591914e-05,
446
+ "loss": 0.0,
447
+ "step": 1180
448
+ },
449
+ {
450
+ "epoch": 0.570884871550904,
451
+ "grad_norm": 0.0,
452
+ "learning_rate": 8.332548218012263e-05,
453
+ "loss": 0.0825,
454
+ "step": 1200
455
+ },
456
+ {
457
+ "epoch": 0.5803996194100857,
458
+ "grad_norm": 0.0,
459
+ "learning_rate": 8.275092493005321e-05,
460
+ "loss": 0.2001,
461
+ "step": 1220
462
+ },
463
+ {
464
+ "epoch": 0.5899143672692674,
465
+ "grad_norm": 0.0,
466
+ "learning_rate": 8.216869123540034e-05,
467
+ "loss": 0.6001,
468
+ "step": 1240
469
+ },
470
+ {
471
+ "epoch": 0.5994291151284491,
472
+ "grad_norm": 1.827052116394043,
473
+ "learning_rate": 8.157891756512488e-05,
474
+ "loss": 0.0177,
475
+ "step": 1260
476
+ },
477
+ {
478
+ "epoch": 0.6089438629876308,
479
+ "grad_norm": 0.0,
480
+ "learning_rate": 8.098174215547224e-05,
481
+ "loss": 0.0,
482
+ "step": 1280
483
+ },
484
+ {
485
+ "epoch": 0.6184586108468125,
486
+ "grad_norm": 0.0,
487
+ "learning_rate": 8.037730497757143e-05,
488
+ "loss": 0.2685,
489
+ "step": 1300
490
+ },
491
+ {
492
+ "epoch": 0.6279733587059942,
493
+ "grad_norm": 0.0,
494
+ "learning_rate": 7.976574770462743e-05,
495
+ "loss": 0.0258,
496
+ "step": 1320
497
+ },
498
+ {
499
+ "epoch": 0.6374881065651761,
500
+ "grad_norm": 0.0,
501
+ "learning_rate": 7.914721367871454e-05,
502
+ "loss": 0.0001,
503
+ "step": 1340
504
+ },
505
+ {
506
+ "epoch": 0.6470028544243578,
507
+ "grad_norm": 0.0,
508
+ "learning_rate": 7.852184787717871e-05,
509
+ "loss": 0.422,
510
+ "step": 1360
511
+ },
512
+ {
513
+ "epoch": 0.6565176022835395,
514
+ "grad_norm": 3.8072144985198975,
515
+ "learning_rate": 7.788979687865639e-05,
516
+ "loss": 0.409,
517
+ "step": 1380
518
+ },
519
+ {
520
+ "epoch": 0.6660323501427212,
521
+ "grad_norm": 0.0,
522
+ "learning_rate": 7.725120882871828e-05,
523
+ "loss": 0.4012,
524
+ "step": 1400
525
+ },
526
+ {
527
+ "epoch": 0.6755470980019029,
528
+ "grad_norm": 0.0,
529
+ "learning_rate": 7.660623340514561e-05,
530
+ "loss": 0.0639,
531
+ "step": 1420
532
+ },
533
+ {
534
+ "epoch": 0.6850618458610847,
535
+ "grad_norm": 0.0,
536
+ "learning_rate": 7.595502178284743e-05,
537
+ "loss": 0.1534,
538
+ "step": 1440
539
+ },
540
+ {
541
+ "epoch": 0.6945765937202664,
542
+ "grad_norm": 0.0,
543
+ "learning_rate": 7.529772659842685e-05,
544
+ "loss": 0.0362,
545
+ "step": 1460
546
+ },
547
+ {
548
+ "epoch": 0.7040913415794482,
549
+ "grad_norm": 0.0,
550
+ "learning_rate": 7.46345019144048e-05,
551
+ "loss": 0.0066,
552
+ "step": 1480
553
+ },
554
+ {
555
+ "epoch": 0.7136060894386299,
556
+ "grad_norm": 0.0,
557
+ "learning_rate": 7.396550318310957e-05,
558
+ "loss": 0.7555,
559
+ "step": 1500
560
+ },
561
+ {
562
+ "epoch": 0.7136060894386299,
563
+ "eval_loss": NaN,
564
+ "eval_runtime": 7.7236,
565
+ "eval_samples_per_second": 2.848,
566
+ "eval_steps_per_second": 2.848,
567
+ "step": 1500
568
+ },
569
+ {
570
+ "epoch": 0.7231208372978116,
571
+ "grad_norm": 1.2502919435501099,
572
+ "learning_rate": 7.32908872102405e-05,
573
+ "loss": 0.1364,
574
+ "step": 1520
575
+ },
576
+ {
577
+ "epoch": 0.7326355851569933,
578
+ "grad_norm": 0.0,
579
+ "learning_rate": 7.261081211811444e-05,
580
+ "loss": 0.199,
581
+ "step": 1540
582
+ },
583
+ {
584
+ "epoch": 0.7421503330161751,
585
+ "grad_norm": 0.0,
586
+ "learning_rate": 7.192543730860388e-05,
587
+ "loss": 0.2509,
588
+ "step": 1560
589
+ },
590
+ {
591
+ "epoch": 0.7516650808753568,
592
+ "grad_norm": 0.0,
593
+ "learning_rate": 7.123492342577472e-05,
594
+ "loss": 0.1916,
595
+ "step": 1580
596
+ },
597
+ {
598
+ "epoch": 0.7611798287345385,
599
+ "grad_norm": 0.0,
600
+ "learning_rate": 7.053943231823328e-05,
601
+ "loss": 0.3898,
602
+ "step": 1600
603
+ },
604
+ {
605
+ "epoch": 0.7706945765937203,
606
+ "grad_norm": 3.7327072620391846,
607
+ "learning_rate": 6.98391270011908e-05,
608
+ "loss": 0.1748,
609
+ "step": 1620
610
+ },
611
+ {
612
+ "epoch": 0.780209324452902,
613
+ "grad_norm": 0.0,
614
+ "learning_rate": 6.91341716182545e-05,
615
+ "loss": 0.1127,
616
+ "step": 1640
617
+ },
618
+ {
619
+ "epoch": 0.7897240723120837,
620
+ "grad_norm": 0.0,
621
+ "learning_rate": 6.842473140295417e-05,
622
+ "loss": 0.0495,
623
+ "step": 1660
624
+ },
625
+ {
626
+ "epoch": 0.7992388201712655,
627
+ "grad_norm": 0.0,
628
+ "learning_rate": 6.77109726400134e-05,
629
+ "loss": 0.1478,
630
+ "step": 1680
631
+ },
632
+ {
633
+ "epoch": 0.8087535680304472,
634
+ "grad_norm": 0.0,
635
+ "learning_rate": 6.69930626263742e-05,
636
+ "loss": 0.5869,
637
+ "step": 1700
638
+ },
639
+ {
640
+ "epoch": 0.8182683158896289,
641
+ "grad_norm": 0.0,
642
+ "learning_rate": 6.627116963198458e-05,
643
+ "loss": 0.1457,
644
+ "step": 1720
645
+ },
646
+ {
647
+ "epoch": 0.8277830637488106,
648
+ "grad_norm": 0.0,
649
+ "learning_rate": 6.554546286035802e-05,
650
+ "loss": 0.671,
651
+ "step": 1740
652
+ },
653
+ {
654
+ "epoch": 0.8372978116079924,
655
+ "grad_norm": 10.052992820739746,
656
+ "learning_rate": 6.481611240891396e-05,
657
+ "loss": 0.5565,
658
+ "step": 1760
659
+ },
660
+ {
661
+ "epoch": 0.8468125594671742,
662
+ "grad_norm": 0.0,
663
+ "learning_rate": 6.408328922910905e-05,
664
+ "loss": 0.4085,
665
+ "step": 1780
666
+ },
667
+ {
668
+ "epoch": 0.8563273073263559,
669
+ "grad_norm": 0.0,
670
+ "learning_rate": 6.334716508636796e-05,
671
+ "loss": 0.5145,
672
+ "step": 1800
673
+ },
674
+ {
675
+ "epoch": 0.8658420551855376,
676
+ "grad_norm": 0.0,
677
+ "learning_rate": 6.260791251982354e-05,
678
+ "loss": 0.1931,
679
+ "step": 1820
680
+ },
681
+ {
682
+ "epoch": 0.8753568030447193,
683
+ "grad_norm": 0.0,
684
+ "learning_rate": 6.18657048018757e-05,
685
+ "loss": 0.2708,
686
+ "step": 1840
687
+ },
688
+ {
689
+ "epoch": 0.884871550903901,
690
+ "grad_norm": 0.0,
691
+ "learning_rate": 6.112071589757814e-05,
692
+ "loss": 0.3662,
693
+ "step": 1860
694
+ },
695
+ {
696
+ "epoch": 0.8943862987630827,
697
+ "grad_norm": 0.0,
698
+ "learning_rate": 6.037312042386314e-05,
699
+ "loss": 0.0,
700
+ "step": 1880
701
+ },
702
+ {
703
+ "epoch": 0.9039010466222646,
704
+ "grad_norm": 0.0,
705
+ "learning_rate": 5.962309360861318e-05,
706
+ "loss": 0.0005,
707
+ "step": 1900
708
+ },
709
+ {
710
+ "epoch": 0.9134157944814463,
711
+ "grad_norm": 0.0,
712
+ "learning_rate": 5.887081124958954e-05,
713
+ "loss": 0.1641,
714
+ "step": 1920
715
+ },
716
+ {
717
+ "epoch": 0.922930542340628,
718
+ "grad_norm": 0.0,
719
+ "learning_rate": 5.811644967322737e-05,
720
+ "loss": 0.118,
721
+ "step": 1940
722
+ },
723
+ {
724
+ "epoch": 0.9324452901998097,
725
+ "grad_norm": 0.0,
726
+ "learning_rate": 5.73601856933067e-05,
727
+ "loss": 0.0,
728
+ "step": 1960
729
+ },
730
+ {
731
+ "epoch": 0.9419600380589914,
732
+ "grad_norm": 0.0,
733
+ "learning_rate": 5.660219656950937e-05,
734
+ "loss": 0.0804,
735
+ "step": 1980
736
+ },
737
+ {
738
+ "epoch": 0.9514747859181731,
739
+ "grad_norm": 0.0,
740
+ "learning_rate": 5.584265996587129e-05,
741
+ "loss": 0.3809,
742
+ "step": 2000
743
+ },
744
+ {
745
+ "epoch": 0.9514747859181731,
746
+ "eval_loss": NaN,
747
+ "eval_runtime": 7.6077,
748
+ "eval_samples_per_second": 2.892,
749
+ "eval_steps_per_second": 2.892,
750
+ "step": 2000
751
+ },
752
+ {
753
+ "epoch": 0.9609895337773549,
754
+ "grad_norm": 0.0,
755
+ "learning_rate": 5.5081753909140096e-05,
756
+ "loss": 0.1075,
757
+ "step": 2020
758
+ },
759
+ {
760
+ "epoch": 0.9705042816365367,
761
+ "grad_norm": 0.0,
762
+ "learning_rate": 5.4319656747047654e-05,
763
+ "loss": 0.5366,
764
+ "step": 2040
765
+ },
766
+ {
767
+ "epoch": 0.9800190294957184,
768
+ "grad_norm": 0.0,
769
+ "learning_rate": 5.355654710650737e-05,
770
+ "loss": 0.3959,
771
+ "step": 2060
772
+ },
773
+ {
774
+ "epoch": 0.9895337773549001,
775
+ "grad_norm": 0.0,
776
+ "learning_rate": 5.2792603851746125e-05,
777
+ "loss": 0.1957,
778
+ "step": 2080
779
+ },
780
+ {
781
+ "epoch": 0.9990485252140818,
782
+ "grad_norm": 0.0,
783
+ "learning_rate": 5.2028006042380474e-05,
784
+ "loss": 0.184,
785
+ "step": 2100
786
+ },
787
+ {
788
+ "epoch": 1.0085632730732637,
789
+ "grad_norm": 0.0,
790
+ "learning_rate": 5.126293289144715e-05,
791
+ "loss": 0.1857,
792
+ "step": 2120
793
+ },
794
+ {
795
+ "epoch": 1.0180780209324454,
796
+ "grad_norm": 0.0,
797
+ "learning_rate": 5.0497563723397526e-05,
798
+ "loss": 0.5685,
799
+ "step": 2140
800
+ },
801
+ {
802
+ "epoch": 1.027592768791627,
803
+ "grad_norm": 0.0,
804
+ "learning_rate": 4.973207793206604e-05,
805
+ "loss": 0.0807,
806
+ "step": 2160
807
+ },
808
+ {
809
+ "epoch": 1.0371075166508088,
810
+ "grad_norm": 0.0,
811
+ "learning_rate": 4.8966654938622295e-05,
812
+ "loss": 0.6812,
813
+ "step": 2180
814
+ },
815
+ {
816
+ "epoch": 1.0466222645099905,
817
+ "grad_norm": 0.0,
818
+ "learning_rate": 4.820147414951677e-05,
819
+ "loss": 0.0,
820
+ "step": 2200
821
+ },
822
+ {
823
+ "epoch": 1.0561370123691722,
824
+ "grad_norm": 0.0,
825
+ "learning_rate": 4.743671491443005e-05,
826
+ "loss": 0.0253,
827
+ "step": 2220
828
+ },
829
+ {
830
+ "epoch": 1.065651760228354,
831
+ "grad_norm": 2.1436240673065186,
832
+ "learning_rate": 4.6672556484235174e-05,
833
+ "loss": 0.1353,
834
+ "step": 2240
835
+ },
836
+ {
837
+ "epoch": 1.0751665080875357,
838
+ "grad_norm": 0.0,
839
+ "learning_rate": 4.590917796898342e-05,
840
+ "loss": 0.2796,
841
+ "step": 2260
842
+ },
843
+ {
844
+ "epoch": 1.0846812559467174,
845
+ "grad_norm": 0.0,
846
+ "learning_rate": 4.514675829592278e-05,
847
+ "loss": 0.1627,
848
+ "step": 2280
849
+ },
850
+ {
851
+ "epoch": 1.094196003805899,
852
+ "grad_norm": 0.0,
853
+ "learning_rate": 4.438547616755962e-05,
854
+ "loss": 0.0606,
855
+ "step": 2300
856
+ },
857
+ {
858
+ "epoch": 1.1037107516650808,
859
+ "grad_norm": 0.0,
860
+ "learning_rate": 4.3625510019772715e-05,
861
+ "loss": 0.008,
862
+ "step": 2320
863
+ },
864
+ {
865
+ "epoch": 1.1132254995242625,
866
+ "grad_norm": 4.797143459320068,
867
+ "learning_rate": 4.2867037979989957e-05,
868
+ "loss": 0.1238,
869
+ "step": 2340
870
+ },
871
+ {
872
+ "epoch": 1.1227402473834442,
873
+ "grad_norm": 0.0,
874
+ "learning_rate": 4.211023782543727e-05,
875
+ "loss": 0.1468,
876
+ "step": 2360
877
+ },
878
+ {
879
+ "epoch": 1.1322549952426262,
880
+ "grad_norm": 0.0,
881
+ "learning_rate": 4.135528694146974e-05,
882
+ "loss": 0.4943,
883
+ "step": 2380
884
+ },
885
+ {
886
+ "epoch": 1.141769743101808,
887
+ "grad_norm": 0.0,
888
+ "learning_rate": 4.060236227999441e-05,
889
+ "loss": 0.1699,
890
+ "step": 2400
891
+ },
892
+ {
893
+ "epoch": 1.1512844909609896,
894
+ "grad_norm": 0.0,
895
+ "learning_rate": 3.985164031799481e-05,
896
+ "loss": 0.1025,
897
+ "step": 2420
898
+ },
899
+ {
900
+ "epoch": 1.1607992388201713,
901
+ "grad_norm": 0.0,
902
+ "learning_rate": 3.910329701616673e-05,
903
+ "loss": 0.0501,
904
+ "step": 2440
905
+ },
906
+ {
907
+ "epoch": 1.170313986679353,
908
+ "grad_norm": 0.0,
909
+ "learning_rate": 3.8357507777675056e-05,
910
+ "loss": 0.1253,
911
+ "step": 2460
912
+ },
913
+ {
914
+ "epoch": 1.1798287345385348,
915
+ "grad_norm": 0.0,
916
+ "learning_rate": 3.761444740704129e-05,
917
+ "loss": 0.0181,
918
+ "step": 2480
919
+ },
920
+ {
921
+ "epoch": 1.1893434823977165,
922
+ "grad_norm": 0.0,
923
+ "learning_rate": 3.687429006917128e-05,
924
+ "loss": 0.0237,
925
+ "step": 2500
926
+ },
927
+ {
928
+ "epoch": 1.1893434823977165,
929
+ "eval_loss": NaN,
930
+ "eval_runtime": 7.4307,
931
+ "eval_samples_per_second": 2.961,
932
+ "eval_steps_per_second": 2.961,
933
+ "step": 2500
934
+ },
935
+ {
936
+ "epoch": 1.1988582302568982,
937
+ "grad_norm": 0.0,
938
+ "learning_rate": 3.613720924853299e-05,
939
+ "loss": 0.1829,
940
+ "step": 2520
941
+ },
942
+ {
943
+ "epoch": 1.20837297811608,
944
+ "grad_norm": 0.0,
945
+ "learning_rate": 3.540337770849371e-05,
946
+ "loss": 0.0596,
947
+ "step": 2540
948
+ },
949
+ {
950
+ "epoch": 1.2178877259752616,
951
+ "grad_norm": 0.0,
952
+ "learning_rate": 3.467296745082634e-05,
953
+ "loss": 0.0627,
954
+ "step": 2560
955
+ },
956
+ {
957
+ "epoch": 1.2274024738344433,
958
+ "grad_norm": 0.0,
959
+ "learning_rate": 3.394614967539402e-05,
960
+ "loss": 0.0365,
961
+ "step": 2580
962
+ },
963
+ {
964
+ "epoch": 1.236917221693625,
965
+ "grad_norm": 0.0,
966
+ "learning_rate": 3.322309474002288e-05,
967
+ "loss": 0.0,
968
+ "step": 2600
969
+ },
970
+ {
971
+ "epoch": 1.2464319695528068,
972
+ "grad_norm": 0.0,
973
+ "learning_rate": 3.25039721205721e-05,
974
+ "loss": 0.0425,
975
+ "step": 2620
976
+ },
977
+ {
978
+ "epoch": 1.2559467174119887,
979
+ "grad_norm": 0.0,
980
+ "learning_rate": 3.178895037121077e-05,
981
+ "loss": 0.0774,
982
+ "step": 2640
983
+ },
984
+ {
985
+ "epoch": 1.2654614652711702,
986
+ "grad_norm": 0.0,
987
+ "learning_rate": 3.107819708491059e-05,
988
+ "loss": 0.5383,
989
+ "step": 2660
990
+ },
991
+ {
992
+ "epoch": 1.2749762131303521,
993
+ "grad_norm": 0.0,
994
+ "learning_rate": 3.037187885416398e-05,
995
+ "loss": 0.0006,
996
+ "step": 2680
997
+ },
998
+ {
999
+ "epoch": 1.2844909609895339,
1000
+ "grad_norm": 0.0,
1001
+ "learning_rate": 2.967016123193682e-05,
1002
+ "loss": 0.0,
1003
+ "step": 2700
1004
+ },
1005
+ {
1006
+ "epoch": 1.2940057088487156,
1007
+ "grad_norm": 0.0,
1008
+ "learning_rate": 2.8973208692864624e-05,
1009
+ "loss": 0.0453,
1010
+ "step": 2720
1011
+ },
1012
+ {
1013
+ "epoch": 1.3035204567078973,
1014
+ "grad_norm": 0.0,
1015
+ "learning_rate": 2.828118459470156e-05,
1016
+ "loss": 0.0058,
1017
+ "step": 2740
1018
+ },
1019
+ {
1020
+ "epoch": 1.313035204567079,
1021
+ "grad_norm": 0.0,
1022
+ "learning_rate": 2.7594251140031223e-05,
1023
+ "loss": 0.0,
1024
+ "step": 2760
1025
+ },
1026
+ {
1027
+ "epoch": 1.3225499524262607,
1028
+ "grad_norm": 0.0,
1029
+ "learning_rate": 2.6912569338248315e-05,
1030
+ "loss": 0.0795,
1031
+ "step": 2780
1032
+ },
1033
+ {
1034
+ "epoch": 1.3320647002854424,
1035
+ "grad_norm": 0.0,
1036
+ "learning_rate": 2.623629896781969e-05,
1037
+ "loss": 0.1333,
1038
+ "step": 2800
1039
+ },
1040
+ {
1041
+ "epoch": 1.3415794481446242,
1042
+ "grad_norm": 0.0,
1043
+ "learning_rate": 2.5565598538834286e-05,
1044
+ "loss": 0.0725,
1045
+ "step": 2820
1046
+ },
1047
+ {
1048
+ "epoch": 1.3510941960038059,
1049
+ "grad_norm": 1.8207381963729858,
1050
+ "learning_rate": 2.4900625255849986e-05,
1051
+ "loss": 0.0143,
1052
+ "step": 2840
1053
+ },
1054
+ {
1055
+ "epoch": 1.3606089438629876,
1056
+ "grad_norm": 3.001304864883423,
1057
+ "learning_rate": 2.4241534981046815e-05,
1058
+ "loss": 0.0415,
1059
+ "step": 2860
1060
+ },
1061
+ {
1062
+ "epoch": 1.3701236917221693,
1063
+ "grad_norm": 2.418036699295044,
1064
+ "learning_rate": 2.3588482197694478e-05,
1065
+ "loss": 0.0665,
1066
+ "step": 2880
1067
+ },
1068
+ {
1069
+ "epoch": 1.379638439581351,
1070
+ "grad_norm": 0.0,
1071
+ "learning_rate": 2.294161997394336e-05,
1072
+ "loss": 0.1571,
1073
+ "step": 2900
1074
+ },
1075
+ {
1076
+ "epoch": 1.3891531874405327,
1077
+ "grad_norm": 0.0,
1078
+ "learning_rate": 2.2301099926946968e-05,
1079
+ "loss": 0.12,
1080
+ "step": 2920
1081
+ },
1082
+ {
1083
+ "epoch": 1.3986679352997147,
1084
+ "grad_norm": 0.0,
1085
+ "learning_rate": 2.1667072187324726e-05,
1086
+ "loss": 0.0335,
1087
+ "step": 2940
1088
+ },
1089
+ {
1090
+ "epoch": 1.4081826831588962,
1091
+ "grad_norm": 0.0,
1092
+ "learning_rate": 2.1039685363972934e-05,
1093
+ "loss": 0.0007,
1094
+ "step": 2960
1095
+ },
1096
+ {
1097
+ "epoch": 1.417697431018078,
1098
+ "grad_norm": 0.0,
1099
+ "learning_rate": 2.0419086509232648e-05,
1100
+ "loss": 0.0988,
1101
+ "step": 2980
1102
+ },
1103
+ {
1104
+ "epoch": 1.4272121788772598,
1105
+ "grad_norm": 0.0,
1106
+ "learning_rate": 1.9805421084422167e-05,
1107
+ "loss": 0.0749,
1108
+ "step": 3000
1109
+ },
1110
+ {
1111
+ "epoch": 1.4272121788772598,
1112
+ "eval_loss": NaN,
1113
+ "eval_runtime": 7.6652,
1114
+ "eval_samples_per_second": 2.87,
1115
+ "eval_steps_per_second": 2.87,
1116
+ "step": 3000
1117
+ },
1118
+ {
1119
+ "epoch": 1.4367269267364415,
1120
+ "grad_norm": 0.0,
1121
+ "learning_rate": 1.919883292574269e-05,
1122
+ "loss": 0.1049,
1123
+ "step": 3020
1124
+ },
1125
+ {
1126
+ "epoch": 1.4462416745956232,
1127
+ "grad_norm": 0.0,
1128
+ "learning_rate": 1.8599464210564586e-05,
1129
+ "loss": 0.0917,
1130
+ "step": 3040
1131
+ },
1132
+ {
1133
+ "epoch": 1.455756422454805,
1134
+ "grad_norm": 0.0,
1135
+ "learning_rate": 1.8007455424102797e-05,
1136
+ "loss": 0.3418,
1137
+ "step": 3060
1138
+ },
1139
+ {
1140
+ "epoch": 1.4652711703139867,
1141
+ "grad_norm": 0.0,
1142
+ "learning_rate": 1.7422945326488553e-05,
1143
+ "loss": 0.0139,
1144
+ "step": 3080
1145
+ },
1146
+ {
1147
+ "epoch": 1.4747859181731684,
1148
+ "grad_norm": 0.0,
1149
+ "learning_rate": 1.68460709202457e-05,
1150
+ "loss": 0.0101,
1151
+ "step": 3100
1152
+ },
1153
+ {
1154
+ "epoch": 1.4843006660323501,
1155
+ "grad_norm": 0.0,
1156
+ "learning_rate": 1.6276967418178746e-05,
1157
+ "loss": 0.0394,
1158
+ "step": 3120
1159
+ },
1160
+ {
1161
+ "epoch": 1.4938154138915318,
1162
+ "grad_norm": 0.0,
1163
+ "learning_rate": 1.5715768211680647e-05,
1164
+ "loss": 0.1327,
1165
+ "step": 3140
1166
+ },
1167
+ {
1168
+ "epoch": 1.5033301617507138,
1169
+ "grad_norm": 0.0,
1170
+ "learning_rate": 1.5162604839467265e-05,
1171
+ "loss": 0.0294,
1172
+ "step": 3160
1173
+ },
1174
+ {
1175
+ "epoch": 1.5128449096098953,
1176
+ "grad_norm": 0.0,
1177
+ "learning_rate": 1.4617606956746216e-05,
1178
+ "loss": 0.0,
1179
+ "step": 3180
1180
+ },
1181
+ {
1182
+ "epoch": 1.5223596574690772,
1183
+ "grad_norm": 0.0,
1184
+ "learning_rate": 1.4080902304827242e-05,
1185
+ "loss": 0.0,
1186
+ "step": 3200
1187
+ },
1188
+ {
1189
+ "epoch": 1.5318744053282587,
1190
+ "grad_norm": 0.0,
1191
+ "learning_rate": 1.3552616681181013e-05,
1192
+ "loss": 0.1559,
1193
+ "step": 3220
1194
+ },
1195
+ {
1196
+ "epoch": 1.5413891531874406,
1197
+ "grad_norm": 0.0,
1198
+ "learning_rate": 1.3032873909953636e-05,
1199
+ "loss": 0.0824,
1200
+ "step": 3240
1201
+ },
1202
+ {
1203
+ "epoch": 1.5509039010466221,
1204
+ "grad_norm": 0.0,
1205
+ "learning_rate": 1.2521795812943704e-05,
1206
+ "loss": 0.0029,
1207
+ "step": 3260
1208
+ },
1209
+ {
1210
+ "epoch": 1.560418648905804,
1211
+ "grad_norm": 0.0,
1212
+ "learning_rate": 1.2019502181048676e-05,
1213
+ "loss": 0.0081,
1214
+ "step": 3280
1215
+ },
1216
+ {
1217
+ "epoch": 1.5699333967649858,
1218
+ "grad_norm": 0.0,
1219
+ "learning_rate": 1.1526110746187224e-05,
1220
+ "loss": 0.1394,
1221
+ "step": 3300
1222
+ },
1223
+ {
1224
+ "epoch": 1.5794481446241675,
1225
+ "grad_norm": 0.0,
1226
+ "learning_rate": 1.1041737153704185e-05,
1227
+ "loss": 0.0001,
1228
+ "step": 3320
1229
+ },
1230
+ {
1231
+ "epoch": 1.5889628924833492,
1232
+ "grad_norm": 0.076429083943367,
1233
+ "learning_rate": 1.0566494935264625e-05,
1234
+ "loss": 0.0008,
1235
+ "step": 3340
1236
+ },
1237
+ {
1238
+ "epoch": 1.598477640342531,
1239
+ "grad_norm": 0.0,
1240
+ "learning_rate": 1.0100495482243356e-05,
1241
+ "loss": 0.0104,
1242
+ "step": 3360
1243
+ },
1244
+ {
1245
+ "epoch": 1.6079923882017126,
1246
+ "grad_norm": 0.0,
1247
+ "learning_rate": 9.643848019616003e-06,
1248
+ "loss": 0.0239,
1249
+ "step": 3380
1250
+ },
1251
+ {
1252
+ "epoch": 1.6175071360608944,
1253
+ "grad_norm": 0.0,
1254
+ "learning_rate": 9.196659580357913e-06,
1255
+ "loss": 0.0049,
1256
+ "step": 3400
1257
+ },
1258
+ {
1259
+ "epoch": 1.627021883920076,
1260
+ "grad_norm": 0.039900340139865875,
1261
+ "learning_rate": 8.759034980356862e-06,
1262
+ "loss": 0.0046,
1263
+ "step": 3420
1264
+ },
1265
+ {
1266
+ "epoch": 1.6365366317792578,
1267
+ "grad_norm": 0.0,
1268
+ "learning_rate": 8.33107679384542e-06,
1269
+ "loss": 0.2859,
1270
+ "step": 3440
1271
+ },
1272
+ {
1273
+ "epoch": 1.6460513796384397,
1274
+ "grad_norm": 0.0,
1275
+ "learning_rate": 7.912885329358688e-06,
1276
+ "loss": 0.193,
1277
+ "step": 3460
1278
+ },
1279
+ {
1280
+ "epoch": 1.6555661274976212,
1281
+ "grad_norm": 0.0,
1282
+ "learning_rate": 7.504558606223122e-06,
1283
+ "loss": 0.3163,
1284
+ "step": 3480
1285
+ },
1286
+ {
1287
+ "epoch": 1.6650808753568032,
1288
+ "grad_norm": 0.0,
1289
+ "learning_rate": 7.106192331581896e-06,
1290
+ "loss": 0.0896,
1291
+ "step": 3500
1292
+ },
1293
+ {
1294
+ "epoch": 1.6650808753568032,
1295
+ "eval_loss": NaN,
1296
+ "eval_runtime": 9.6486,
1297
+ "eval_samples_per_second": 2.28,
1298
+ "eval_steps_per_second": 2.28,
1299
+ "step": 3500
1300
+ },
1301
+ {
1302
+ "epoch": 1.6745956232159847,
1303
+ "grad_norm": 4.094954490661621,
1304
+ "learning_rate": 6.717879877962291e-06,
1305
+ "loss": 0.0605,
1306
+ "step": 3520
1307
+ },
1308
+ {
1309
+ "epoch": 1.6841103710751666,
1310
+ "grad_norm": 0.0,
1311
+ "learning_rate": 6.339712261390213e-06,
1312
+ "loss": 0.1816,
1313
+ "step": 3540
1314
+ },
1315
+ {
1316
+ "epoch": 1.693625118934348,
1317
+ "grad_norm": 0.0,
1318
+ "learning_rate": 5.971778120057031e-06,
1319
+ "loss": 0.0005,
1320
+ "step": 3560
1321
+ },
1322
+ {
1323
+ "epoch": 1.70313986679353,
1324
+ "grad_norm": 0.0,
1325
+ "learning_rate": 5.614163693543822e-06,
1326
+ "loss": 0.0328,
1327
+ "step": 3580
1328
+ },
1329
+ {
1330
+ "epoch": 1.7126546146527117,
1331
+ "grad_norm": 7.35809850692749,
1332
+ "learning_rate": 5.266952802607828e-06,
1333
+ "loss": 0.2526,
1334
+ "step": 3600
1335
+ },
1336
+ {
1337
+ "epoch": 1.7221693625118935,
1338
+ "grad_norm": 0.0,
1339
+ "learning_rate": 4.930226829535767e-06,
1340
+ "loss": 0.1301,
1341
+ "step": 3620
1342
+ },
1343
+ {
1344
+ "epoch": 1.7316841103710752,
1345
+ "grad_norm": 0.0,
1346
+ "learning_rate": 4.604064699068766e-06,
1347
+ "loss": 0.0367,
1348
+ "step": 3640
1349
+ },
1350
+ {
1351
+ "epoch": 1.7411988582302569,
1352
+ "grad_norm": 0.0,
1353
+ "learning_rate": 4.288542859903316e-06,
1354
+ "loss": 0.0089,
1355
+ "step": 3660
1356
+ },
1357
+ {
1358
+ "epoch": 1.7507136060894386,
1359
+ "grad_norm": 0.0,
1360
+ "learning_rate": 3.983735266772565e-06,
1361
+ "loss": 0.0477,
1362
+ "step": 3680
1363
+ },
1364
+ {
1365
+ "epoch": 1.7602283539486203,
1366
+ "grad_norm": 0.0,
1367
+ "learning_rate": 3.689713363112146e-06,
1368
+ "loss": 0.0,
1369
+ "step": 3700
1370
+ },
1371
+ {
1372
+ "epoch": 1.7697431018078023,
1373
+ "grad_norm": 0.0,
1374
+ "learning_rate": 3.406546064314664e-06,
1375
+ "loss": 0.0074,
1376
+ "step": 3720
1377
+ },
1378
+ {
1379
+ "epoch": 1.7792578496669837,
1380
+ "grad_norm": 2.901078462600708,
1381
+ "learning_rate": 3.1342997415767015e-06,
1382
+ "loss": 0.0307,
1383
+ "step": 3740
1384
+ },
1385
+ {
1386
+ "epoch": 1.7887725975261657,
1387
+ "grad_norm": 0.0,
1388
+ "learning_rate": 2.873038206342188e-06,
1389
+ "loss": 0.0974,
1390
+ "step": 3760
1391
+ },
1392
+ {
1393
+ "epoch": 1.7982873453853472,
1394
+ "grad_norm": 0.0,
1395
+ "learning_rate": 2.622822695345706e-06,
1396
+ "loss": 0.0024,
1397
+ "step": 3780
1398
+ },
1399
+ {
1400
+ "epoch": 1.8078020932445291,
1401
+ "grad_norm": 0.0,
1402
+ "learning_rate": 2.3837118562592797e-06,
1403
+ "loss": 0.0944,
1404
+ "step": 3800
1405
+ },
1406
+ {
1407
+ "epoch": 1.8173168411037106,
1408
+ "grad_norm": 0.0,
1409
+ "learning_rate": 2.1557617339460432e-06,
1410
+ "loss": 0.0281,
1411
+ "step": 3820
1412
+ },
1413
+ {
1414
+ "epoch": 1.8268315889628925,
1415
+ "grad_norm": 0.0,
1416
+ "learning_rate": 1.939025757323987e-06,
1417
+ "loss": 0.1706,
1418
+ "step": 3840
1419
+ },
1420
+ {
1421
+ "epoch": 1.8363463368220743,
1422
+ "grad_norm": 3.4238052368164062,
1423
+ "learning_rate": 1.7335547268427843e-06,
1424
+ "loss": 0.0456,
1425
+ "step": 3860
1426
+ },
1427
+ {
1428
+ "epoch": 1.845861084681256,
1429
+ "grad_norm": 0.0,
1430
+ "learning_rate": 1.5393968025767702e-06,
1431
+ "loss": 0.2662,
1432
+ "step": 3880
1433
+ },
1434
+ {
1435
+ "epoch": 1.8553758325404377,
1436
+ "grad_norm": 0.0,
1437
+ "learning_rate": 1.3565974929367397e-06,
1438
+ "loss": 0.1209,
1439
+ "step": 3900
1440
+ },
1441
+ {
1442
+ "epoch": 1.8648905803996194,
1443
+ "grad_norm": 0.0,
1444
+ "learning_rate": 1.1851996440033319e-06,
1445
+ "loss": 0.1809,
1446
+ "step": 3920
1447
+ },
1448
+ {
1449
+ "epoch": 1.8744053282588011,
1450
+ "grad_norm": 0.0,
1451
+ "learning_rate": 1.0252434294843737e-06,
1452
+ "loss": 0.1648,
1453
+ "step": 3940
1454
+ },
1455
+ {
1456
+ "epoch": 1.8839200761179828,
1457
+ "grad_norm": 0.0,
1458
+ "learning_rate": 8.767663412986127e-07,
1459
+ "loss": 0.3638,
1460
+ "step": 3960
1461
+ },
1462
+ {
1463
+ "epoch": 1.8934348239771646,
1464
+ "grad_norm": 0.0,
1465
+ "learning_rate": 7.398031807880457e-07,
1466
+ "loss": 0.0223,
1467
+ "step": 3980
1468
+ },
1469
+ {
1470
+ "epoch": 1.9029495718363463,
1471
+ "grad_norm": 0.0,
1472
+ "learning_rate": 6.143860505608945e-07,
1473
+ "loss": 0.0,
1474
+ "step": 4000
1475
+ },
1476
+ {
1477
+ "epoch": 1.9029495718363463,
1478
+ "eval_loss": NaN,
1479
+ "eval_runtime": 8.4613,
1480
+ "eval_samples_per_second": 2.6,
1481
+ "eval_steps_per_second": 2.6,
1482
+ "step": 4000
1483
+ }
1484
+ ],
1485
+ "logging_steps": 20,
1486
+ "max_steps": 4204,
1487
+ "num_input_tokens_seen": 0,
1488
+ "num_train_epochs": 2,
1489
+ "save_steps": 500,
1490
+ "stateful_callbacks": {
1491
+ "TrainerControl": {
1492
+ "args": {
1493
+ "should_epoch_stop": false,
1494
+ "should_evaluate": false,
1495
+ "should_log": false,
1496
+ "should_save": true,
1497
+ "should_training_stop": false
1498
+ },
1499
+ "attributes": {}
1500
+ }
1501
+ },
1502
+ "total_flos": 1.658384411983872e+17,
1503
+ "train_batch_size": 1,
1504
+ "trial_name": null,
1505
+ "trial_params": null
1506
+ }
checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55792dc953d95a676ce206247ddbbaa700c8e5f330681c33f8f8cb36f6666d26
3
+ size 12561
checkpoint-4204/README.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: HuggingFaceTB/SmolLM3-3B
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - axolotl
7
+ - base_model:adapter:HuggingFaceTB/SmolLM3-3B
8
+ - lora
9
+ - transformers
10
+ ---
11
+
12
+ # Model Card for Model ID
13
+
14
+ <!-- Provide a quick summary of what the model is/does. -->
15
+
16
+
17
+
18
+ ## Model Details
19
+
20
+ ### Model Description
21
+
22
+ <!-- Provide a longer summary of what this model is. -->
23
+
24
+
25
+
26
+ - **Developed by:** [More Information Needed]
27
+ - **Funded by [optional]:** [More Information Needed]
28
+ - **Shared by [optional]:** [More Information Needed]
29
+ - **Model type:** [More Information Needed]
30
+ - **Language(s) (NLP):** [More Information Needed]
31
+ - **License:** [More Information Needed]
32
+ - **Finetuned from model [optional]:** [More Information Needed]
33
+
34
+ ### Model Sources [optional]
35
+
36
+ <!-- Provide the basic links for the model. -->
37
+
38
+ - **Repository:** [More Information Needed]
39
+ - **Paper [optional]:** [More Information Needed]
40
+ - **Demo [optional]:** [More Information Needed]
41
+
42
+ ## Uses
43
+
44
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
45
+
46
+ ### Direct Use
47
+
48
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Downstream Use [optional]
53
+
54
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
55
+
56
+ [More Information Needed]
57
+
58
+ ### Out-of-Scope Use
59
+
60
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ## Bias, Risks, and Limitations
65
+
66
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
67
+
68
+ [More Information Needed]
69
+
70
+ ### Recommendations
71
+
72
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
73
+
74
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
75
+
76
+ ## How to Get Started with the Model
77
+
78
+ Use the code below to get started with the model.
79
+
80
+ [More Information Needed]
81
+
82
+ ## Training Details
83
+
84
+ ### Training Data
85
+
86
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
87
+
88
+ [More Information Needed]
89
+
90
+ ### Training Procedure
91
+
92
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
93
+
94
+ #### Preprocessing [optional]
95
+
96
+ [More Information Needed]
97
+
98
+
99
+ #### Training Hyperparameters
100
+
101
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
102
+
103
+ #### Speeds, Sizes, Times [optional]
104
+
105
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
106
+
107
+ [More Information Needed]
108
+
109
+ ## Evaluation
110
+
111
+ <!-- This section describes the evaluation protocols and provides the results. -->
112
+
113
+ ### Testing Data, Factors & Metrics
114
+
115
+ #### Testing Data
116
+
117
+ <!-- This should link to a Dataset Card if possible. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Factors
122
+
123
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
124
+
125
+ [More Information Needed]
126
+
127
+ #### Metrics
128
+
129
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
130
+
131
+ [More Information Needed]
132
+
133
+ ### Results
134
+
135
+ [More Information Needed]
136
+
137
+ #### Summary
138
+
139
+
140
+
141
+ ## Model Examination [optional]
142
+
143
+ <!-- Relevant interpretability work for the model goes here -->
144
+
145
+ [More Information Needed]
146
+
147
+ ## Environmental Impact
148
+
149
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
150
+
151
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
152
+
153
+ - **Hardware Type:** [More Information Needed]
154
+ - **Hours used:** [More Information Needed]
155
+ - **Cloud Provider:** [More Information Needed]
156
+ - **Compute Region:** [More Information Needed]
157
+ - **Carbon Emitted:** [More Information Needed]
158
+
159
+ ## Technical Specifications [optional]
160
+
161
+ ### Model Architecture and Objective
162
+
163
+ [More Information Needed]
164
+
165
+ ### Compute Infrastructure
166
+
167
+ [More Information Needed]
168
+
169
+ #### Hardware
170
+
171
+ [More Information Needed]
172
+
173
+ #### Software
174
+
175
+ [More Information Needed]
176
+
177
+ ## Citation [optional]
178
+
179
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
180
+
181
+ **BibTeX:**
182
+
183
+ [More Information Needed]
184
+
185
+ **APA:**
186
+
187
+ [More Information Needed]
188
+
189
+ ## Glossary [optional]
190
+
191
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
192
+
193
+ [More Information Needed]
194
+
195
+ ## More Information [optional]
196
+
197
+ [More Information Needed]
198
+
199
+ ## Model Card Authors [optional]
200
+
201
+ [More Information Needed]
202
+
203
+ ## Model Card Contact
204
+
205
+ [More Information Needed]
206
+ ### Framework versions
207
+
208
+ - PEFT 0.16.0
checkpoint-4204/adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceTB/SmolLM3-3B",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "up_proj",
29
+ "gate_proj",
30
+ "k_proj",
31
+ "q_proj",
32
+ "v_proj",
33
+ "down_proj",
34
+ "o_proj"
35
+ ],
36
+ "task_type": "CAUSAL_LM",
37
+ "trainable_token_indices": null,
38
+ "use_dora": false,
39
+ "use_qalora": false,
40
+ "use_rslora": false
41
+ }
checkpoint-4204/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5f9c3bddde813745a86cb601f9d1069128c55abadd89820433b1d99a9baaa6
3
+ size 120981200
checkpoint-4204/chat_template.jinja ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ───── defaults ───── #}
2
+ {%- if enable_thinking is not defined -%}
3
+ {%- set enable_thinking = true -%}
4
+ {%- endif -%}
5
+
6
+ {# ───── reasoning mode ───── #}
7
+ {%- if enable_thinking -%}
8
+ {%- set reasoning_mode = "/think" -%}
9
+ {%- else -%}
10
+ {%- set reasoning_mode = "/no_think" -%}
11
+ {%- endif -%}
12
+
13
+ {# ───── header (system message) ───── #}
14
+ {{- "<|im_start|>system\n" -}}
15
+
16
+ {%- if messages[0].role == "system" -%}
17
+ {%- set system_message = messages[0].content -%}
18
+ {%- if "/no_think" in system_message -%}
19
+ {%- set reasoning_mode = "/no_think" -%}
20
+ {%- elif "/think" in system_message -%}
21
+ {%- set reasoning_mode = "/think" -%}
22
+ {%- endif -%}
23
+ {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
24
+ {%- endif -%}
25
+
26
+ {%- if "/system_override" in system_message -%}
27
+ {{- custom_instructions.replace("/system_override", "").rstrip() -}}
28
+ {{- "<|im_end|>\n" -}}
29
+ {%- else -%}
30
+ {{- "## Metadata\n\n" -}}
31
+ {{- "Knowledge Cutoff Date: June 2025\n" -}}
32
+ {%- set today = strftime_now("%d %B %Y") -%}
33
+ {{- "Today Date: " ~ today ~ "\n" -}}
34
+ {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
35
+
36
+ {{- "## Custom Instructions\n\n" -}}
37
+ {%- if custom_instructions -%}
38
+ {{- custom_instructions + "\n\n" -}}
39
+ {%- elif reasoning_mode == "/think" -%}
40
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion.\n\n" -}}
41
+ {%- else -%}
42
+ {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
43
+ {%- endif -%}
44
+
45
+ {%- if xml_tools or python_tools or tools -%}
46
+ {{- "### Tools\n\n" -}}
47
+ {%- if xml_tools or tools -%}
48
+ {%- if tools -%}
49
+ {%- set xml_tools = tools -%}
50
+ {%- endif -%}
51
+ {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
52
+ {%- for tool in xml_tools[:] -%} {# The slicing makes sure that xml_tools is a list #}
53
+ {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | string) ~ "\n" -%}
54
+ {%- endfor -%}
55
+ {%- set xml_tool_string = ns.xml_tool_string + "</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>" -%}
56
+ {{- xml_tool_string -}}
57
+ {%- endif -%}
58
+ {%- if python_tools -%}
59
+ {%- set ns = namespace(python_tool_string="When you send a message containing Python code between '<code>' and '</code>' tags, it will be executed in a stateful Jupyter notebook environment, and you will then be given the output to continued reasoning in an agentic loop.\n\nYou can use the following tools in your python code like regular functions:\n<tools>\n") -%}
60
+ {%- for tool in python_tools[:] -%} {# The slicing makes sure that python_tools is a list #}
61
+ {%- set ns.python_tool_string = ns.python_tool_string ~ (tool | string) ~ "\n" -%}
62
+ {%- endfor -%}
63
+ {%- set python_tool_string = ns.python_tool_string + "</tools>\n\nThe state persists between code executions: so variables that you define in one step are still available thereafter." -%}
64
+ {{- python_tool_string -}}
65
+ {%- endif -%}
66
+ {{- "\n\n" -}}
67
+ {{- "<|im_end|>\n" -}}
68
+ {%- endif -%}
69
+ {%- endif -%}
70
+ {# ───── main loop ───── #}
71
+ {%- for message in messages -%}
72
+ {%- set content = message.content if message.content is string else "" -%}
73
+ {%- if message.role == "user" -%}
74
+ {{ "<|im_start|>" + message.role + "\n" + content + "<|im_end|>\n" }}
75
+ {%- elif message.role == "assistant" -%}
76
+ {% generation %}
77
+ {%- if reasoning_mode == "/think" -%}
78
+ {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
79
+ {%- else -%}
80
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
81
+ {%- endif -%}
82
+ {% endgeneration %}
83
+ {%- elif message.role == "tool" -%}
84
+ {{ "<|im_start|>" + "assistant\n" + content + "<|im_end|>\n" }}
85
+ {%- endif -%}
86
+ {%- endfor -%}
87
+ {# ───── generation prompt ───── #}
88
+ {%- if add_generation_prompt -%}
89
+ {%- if reasoning_mode == "/think" -%}
90
+ {{ "<|im_start|>assistant\n" }}
91
+ {%- else -%}
92
+ {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" }}
93
+ {%- endif -%}
94
+ {%- endif -%}
checkpoint-4204/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9059b8b136cbd6ae6580034e6490c52b7122e34fe7182fc0ffdb92fc4ab4b7
3
+ size 242252619
checkpoint-4204/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9ac539d427930fc1a47efc7d56442e14dae771f3d35b6ec7ecb47d519d0146
3
+ size 14645
checkpoint-4204/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc194d0768057f1849bd6416e50ed448f26b1d9d15cca93ba47764dbb8d8126
3
+ size 1465
checkpoint-4204/special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "<|im_end|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
+ }
checkpoint-4204/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6a500b662a34eb3f0374db856ba4ad7de4c81040571d78dc0d357238930005
3
+ size 17208819
checkpoint-4204/tokenizer_config.json ADDED
@@ -0,0 +1,2064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<think>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "128003": {
28
+ "content": "</think>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|im_start|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|im_end|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<tool_response>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "128014": {
116
+ "content": "</tool_response>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "128015": {
124
+ "content": "<tool_call>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "128016": {
132
+ "content": "</tool_call>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "128017": {
140
+ "content": "<code>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "128018": {
148
+ "content": "</code>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": null,
2053
+ "clean_up_tokenization_spaces": true,
2054
+ "eos_token": "<|im_end|>",
2055
+ "extra_special_tokens": {},
2056
+ "fast": false,
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
+ "model_max_length": 131072,
2062
+ "pad_token": "<|im_end|>",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
+ }
checkpoint-4204/trainer_state.json ADDED
@@ -0,0 +1,1576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4204,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0,
14
+ "eval_loss": NaN,
15
+ "eval_runtime": 4.5948,
16
+ "eval_samples_per_second": 4.788,
17
+ "eval_steps_per_second": 4.788,
18
+ "step": 0
19
+ },
20
+ {
21
+ "epoch": 0.009514747859181731,
22
+ "grad_norm": 0.0,
23
+ "learning_rate": 1.9e-05,
24
+ "loss": 2.0934,
25
+ "step": 20
26
+ },
27
+ {
28
+ "epoch": 0.019029495718363463,
29
+ "grad_norm": 0.0,
30
+ "learning_rate": 3.9000000000000006e-05,
31
+ "loss": 0.4738,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.028544243577545196,
36
+ "grad_norm": 0.0,
37
+ "learning_rate": 5.9e-05,
38
+ "loss": 0.8557,
39
+ "step": 60
40
+ },
41
+ {
42
+ "epoch": 0.038058991436726926,
43
+ "grad_norm": 0.0,
44
+ "learning_rate": 7.900000000000001e-05,
45
+ "loss": 0.3978,
46
+ "step": 80
47
+ },
48
+ {
49
+ "epoch": 0.047573739295908656,
50
+ "grad_norm": 0.0,
51
+ "learning_rate": 9.900000000000001e-05,
52
+ "loss": 0.4128,
53
+ "step": 100
54
+ },
55
+ {
56
+ "epoch": 0.05708848715509039,
57
+ "grad_norm": 0.0,
58
+ "learning_rate": 9.999471159635539e-05,
59
+ "loss": 0.148,
60
+ "step": 120
61
+ },
62
+ {
63
+ "epoch": 0.06660323501427212,
64
+ "grad_norm": 0.0,
65
+ "learning_rate": 9.997771965008657e-05,
66
+ "loss": 0.3554,
67
+ "step": 140
68
+ },
69
+ {
70
+ "epoch": 0.07611798287345385,
71
+ "grad_norm": 0.0,
72
+ "learning_rate": 9.994901349433758e-05,
73
+ "loss": 1.0388,
74
+ "step": 160
75
+ },
76
+ {
77
+ "epoch": 0.08563273073263558,
78
+ "grad_norm": 7.0538105964660645,
79
+ "learning_rate": 9.990859985750506e-05,
80
+ "loss": 0.6268,
81
+ "step": 180
82
+ },
83
+ {
84
+ "epoch": 0.09514747859181731,
85
+ "grad_norm": 6.37161111831665,
86
+ "learning_rate": 9.985648821208616e-05,
87
+ "loss": 0.1791,
88
+ "step": 200
89
+ },
90
+ {
91
+ "epoch": 0.10466222645099905,
92
+ "grad_norm": 0.0,
93
+ "learning_rate": 9.979269077245831e-05,
94
+ "loss": 0.0,
95
+ "step": 220
96
+ },
97
+ {
98
+ "epoch": 0.11417697431018078,
99
+ "grad_norm": 0.0,
100
+ "learning_rate": 9.97172224920163e-05,
101
+ "loss": 0.4112,
102
+ "step": 240
103
+ },
104
+ {
105
+ "epoch": 0.12369172216936251,
106
+ "grad_norm": 4.980894565582275,
107
+ "learning_rate": 9.963010105966736e-05,
108
+ "loss": 0.2527,
109
+ "step": 260
110
+ },
111
+ {
112
+ "epoch": 0.13320647002854424,
113
+ "grad_norm": 0.0,
114
+ "learning_rate": 9.953134689568506e-05,
115
+ "loss": 0.639,
116
+ "step": 280
117
+ },
118
+ {
119
+ "epoch": 0.142721217887726,
120
+ "grad_norm": 0.0,
121
+ "learning_rate": 9.94209831469231e-05,
122
+ "loss": 0.5911,
123
+ "step": 300
124
+ },
125
+ {
126
+ "epoch": 0.1522359657469077,
127
+ "grad_norm": 0.0,
128
+ "learning_rate": 9.929903568138989e-05,
129
+ "loss": 0.0966,
130
+ "step": 320
131
+ },
132
+ {
133
+ "epoch": 0.16175071360608945,
134
+ "grad_norm": 0.0,
135
+ "learning_rate": 9.916553308218537e-05,
136
+ "loss": 0.4412,
137
+ "step": 340
138
+ },
139
+ {
140
+ "epoch": 0.17126546146527116,
141
+ "grad_norm": 0.0,
142
+ "learning_rate": 9.902050664080152e-05,
143
+ "loss": 0.3402,
144
+ "step": 360
145
+ },
146
+ {
147
+ "epoch": 0.1807802093244529,
148
+ "grad_norm": 0.0,
149
+ "learning_rate": 9.886399034978797e-05,
150
+ "loss": 0.1149,
151
+ "step": 380
152
+ },
153
+ {
154
+ "epoch": 0.19029495718363462,
155
+ "grad_norm": 0.0,
156
+ "learning_rate": 9.869602089478447e-05,
157
+ "loss": 0.1225,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 0.19980970504281637,
162
+ "grad_norm": 0.0,
163
+ "learning_rate": 9.85166376459223e-05,
164
+ "loss": 0.0718,
165
+ "step": 420
166
+ },
167
+ {
168
+ "epoch": 0.2093244529019981,
169
+ "grad_norm": 0.0,
170
+ "learning_rate": 9.832588264859624e-05,
171
+ "loss": 0.3826,
172
+ "step": 440
173
+ },
174
+ {
175
+ "epoch": 0.21883920076117983,
176
+ "grad_norm": 3.183824300765991,
177
+ "learning_rate": 9.81238006136097e-05,
178
+ "loss": 0.4913,
179
+ "step": 460
180
+ },
181
+ {
182
+ "epoch": 0.22835394862036157,
183
+ "grad_norm": 0.0,
184
+ "learning_rate": 9.7910438906695e-05,
185
+ "loss": 0.0751,
186
+ "step": 480
187
+ },
188
+ {
189
+ "epoch": 0.23786869647954328,
190
+ "grad_norm": 0.0,
191
+ "learning_rate": 9.768584753741134e-05,
192
+ "loss": 0.2444,
193
+ "step": 500
194
+ },
195
+ {
196
+ "epoch": 0.23786869647954328,
197
+ "eval_loss": NaN,
198
+ "eval_runtime": 8.8015,
199
+ "eval_samples_per_second": 2.5,
200
+ "eval_steps_per_second": 2.5,
201
+ "step": 500
202
+ },
203
+ {
204
+ "epoch": 0.24738344433872503,
205
+ "grad_norm": 0.0,
206
+ "learning_rate": 9.745007914742316e-05,
207
+ "loss": 0.1088,
208
+ "step": 520
209
+ },
210
+ {
211
+ "epoch": 0.25689819219790677,
212
+ "grad_norm": 0.0,
213
+ "learning_rate": 9.720318899816155e-05,
214
+ "loss": 0.1922,
215
+ "step": 540
216
+ },
217
+ {
218
+ "epoch": 0.2664129400570885,
219
+ "grad_norm": 0.0,
220
+ "learning_rate": 9.694523495787149e-05,
221
+ "loss": 0.2443,
222
+ "step": 560
223
+ },
224
+ {
225
+ "epoch": 0.2759276879162702,
226
+ "grad_norm": 0.0,
227
+ "learning_rate": 9.667627748804835e-05,
228
+ "loss": 0.0,
229
+ "step": 580
230
+ },
231
+ {
232
+ "epoch": 0.285442435775452,
233
+ "grad_norm": 0.0,
234
+ "learning_rate": 9.639637962926633e-05,
235
+ "loss": 0.6168,
236
+ "step": 600
237
+ },
238
+ {
239
+ "epoch": 0.2949571836346337,
240
+ "grad_norm": 0.0,
241
+ "learning_rate": 9.610560698640241e-05,
242
+ "loss": 0.249,
243
+ "step": 620
244
+ },
245
+ {
246
+ "epoch": 0.3044719314938154,
247
+ "grad_norm": 0.0,
248
+ "learning_rate": 9.580402771325941e-05,
249
+ "loss": 0.1383,
250
+ "step": 640
251
+ },
252
+ {
253
+ "epoch": 0.3139866793529971,
254
+ "grad_norm": 0.0,
255
+ "learning_rate": 9.549171249659145e-05,
256
+ "loss": 0.0658,
257
+ "step": 660
258
+ },
259
+ {
260
+ "epoch": 0.3235014272121789,
261
+ "grad_norm": 0.0,
262
+ "learning_rate": 9.51687345395358e-05,
263
+ "loss": 0.7962,
264
+ "step": 680
265
+ },
266
+ {
267
+ "epoch": 0.3330161750713606,
268
+ "grad_norm": 0.0,
269
+ "learning_rate": 9.483516954445489e-05,
270
+ "loss": 0.1979,
271
+ "step": 700
272
+ },
273
+ {
274
+ "epoch": 0.3425309229305423,
275
+ "grad_norm": 0.0,
276
+ "learning_rate": 9.449109569519253e-05,
277
+ "loss": 0.0846,
278
+ "step": 720
279
+ },
280
+ {
281
+ "epoch": 0.3520456707897241,
282
+ "grad_norm": 0.0,
283
+ "learning_rate": 9.413659363874853e-05,
284
+ "loss": 0.0,
285
+ "step": 740
286
+ },
287
+ {
288
+ "epoch": 0.3615604186489058,
289
+ "grad_norm": 0.0,
290
+ "learning_rate": 9.377174646637597e-05,
291
+ "loss": 0.6378,
292
+ "step": 760
293
+ },
294
+ {
295
+ "epoch": 0.37107516650808753,
296
+ "grad_norm": 0.0,
297
+ "learning_rate": 9.339663969410546e-05,
298
+ "loss": 0.1937,
299
+ "step": 780
300
+ },
301
+ {
302
+ "epoch": 0.38058991436726924,
303
+ "grad_norm": 0.0,
304
+ "learning_rate": 9.301136124270126e-05,
305
+ "loss": 0.1475,
306
+ "step": 800
307
+ },
308
+ {
309
+ "epoch": 0.390104662226451,
310
+ "grad_norm": 0.0,
311
+ "learning_rate": 9.261600141705367e-05,
312
+ "loss": 0.4155,
313
+ "step": 820
314
+ },
315
+ {
316
+ "epoch": 0.39961941008563273,
317
+ "grad_norm": 0.0,
318
+ "learning_rate": 9.221065288501245e-05,
319
+ "loss": 0.4923,
320
+ "step": 840
321
+ },
322
+ {
323
+ "epoch": 0.40913415794481445,
324
+ "grad_norm": 0.6096606254577637,
325
+ "learning_rate": 9.179541065566664e-05,
326
+ "loss": 0.1229,
327
+ "step": 860
328
+ },
329
+ {
330
+ "epoch": 0.4186489058039962,
331
+ "grad_norm": 0.0,
332
+ "learning_rate": 9.137037205707552e-05,
333
+ "loss": 0.0,
334
+ "step": 880
335
+ },
336
+ {
337
+ "epoch": 0.42816365366317793,
338
+ "grad_norm": 0.0,
339
+ "learning_rate": 9.093563671345598e-05,
340
+ "loss": 0.165,
341
+ "step": 900
342
+ },
343
+ {
344
+ "epoch": 0.43767840152235965,
345
+ "grad_norm": 0.0,
346
+ "learning_rate": 9.049130652183166e-05,
347
+ "loss": 0.6362,
348
+ "step": 920
349
+ },
350
+ {
351
+ "epoch": 0.44719314938154137,
352
+ "grad_norm": 0.0,
353
+ "learning_rate": 9.003748562814962e-05,
354
+ "loss": 0.0,
355
+ "step": 940
356
+ },
357
+ {
358
+ "epoch": 0.45670789724072314,
359
+ "grad_norm": 0.0,
360
+ "learning_rate": 8.957428040286959e-05,
361
+ "loss": 0.0507,
362
+ "step": 960
363
+ },
364
+ {
365
+ "epoch": 0.46622264509990485,
366
+ "grad_norm": 0.0,
367
+ "learning_rate": 8.910179941603193e-05,
368
+ "loss": 0.0257,
369
+ "step": 980
370
+ },
371
+ {
372
+ "epoch": 0.47573739295908657,
373
+ "grad_norm": 4.7469587326049805,
374
+ "learning_rate": 8.862015341181018e-05,
375
+ "loss": 0.0655,
376
+ "step": 1000
377
+ },
378
+ {
379
+ "epoch": 0.47573739295908657,
380
+ "eval_loss": NaN,
381
+ "eval_runtime": 7.7821,
382
+ "eval_samples_per_second": 2.827,
383
+ "eval_steps_per_second": 2.827,
384
+ "step": 1000
385
+ },
386
+ {
387
+ "epoch": 0.48525214081826834,
388
+ "grad_norm": 3.7392077445983887,
389
+ "learning_rate": 8.812945528255368e-05,
390
+ "loss": 0.4831,
391
+ "step": 1020
392
+ },
393
+ {
394
+ "epoch": 0.49476688867745006,
395
+ "grad_norm": 0.0,
396
+ "learning_rate": 8.762982004232709e-05,
397
+ "loss": 0.0065,
398
+ "step": 1040
399
+ },
400
+ {
401
+ "epoch": 0.5042816365366318,
402
+ "grad_norm": 0.0,
403
+ "learning_rate": 8.712136479995226e-05,
404
+ "loss": 0.0152,
405
+ "step": 1060
406
+ },
407
+ {
408
+ "epoch": 0.5137963843958135,
409
+ "grad_norm": 0.0,
410
+ "learning_rate": 8.66042087315593e-05,
411
+ "loss": 0.079,
412
+ "step": 1080
413
+ },
414
+ {
415
+ "epoch": 0.5233111322549953,
416
+ "grad_norm": 0.0,
417
+ "learning_rate": 8.60784730526531e-05,
418
+ "loss": 1.2652,
419
+ "step": 1100
420
+ },
421
+ {
422
+ "epoch": 0.532825880114177,
423
+ "grad_norm": 0.0,
424
+ "learning_rate": 8.554428098970171e-05,
425
+ "loss": 0.0,
426
+ "step": 1120
427
+ },
428
+ {
429
+ "epoch": 0.5423406279733587,
430
+ "grad_norm": 0.0,
431
+ "learning_rate": 8.50017577512535e-05,
432
+ "loss": 0.0428,
433
+ "step": 1140
434
+ },
435
+ {
436
+ "epoch": 0.5518553758325404,
437
+ "grad_norm": 2.9529595375061035,
438
+ "learning_rate": 8.445103049858966e-05,
439
+ "loss": 0.3298,
440
+ "step": 1160
441
+ },
442
+ {
443
+ "epoch": 0.5613701236917221,
444
+ "grad_norm": 0.0,
445
+ "learning_rate": 8.389222831591914e-05,
446
+ "loss": 0.0,
447
+ "step": 1180
448
+ },
449
+ {
450
+ "epoch": 0.570884871550904,
451
+ "grad_norm": 0.0,
452
+ "learning_rate": 8.332548218012263e-05,
453
+ "loss": 0.0825,
454
+ "step": 1200
455
+ },
456
+ {
457
+ "epoch": 0.5803996194100857,
458
+ "grad_norm": 0.0,
459
+ "learning_rate": 8.275092493005321e-05,
460
+ "loss": 0.2001,
461
+ "step": 1220
462
+ },
463
+ {
464
+ "epoch": 0.5899143672692674,
465
+ "grad_norm": 0.0,
466
+ "learning_rate": 8.216869123540034e-05,
467
+ "loss": 0.6001,
468
+ "step": 1240
469
+ },
470
+ {
471
+ "epoch": 0.5994291151284491,
472
+ "grad_norm": 1.827052116394043,
473
+ "learning_rate": 8.157891756512488e-05,
474
+ "loss": 0.0177,
475
+ "step": 1260
476
+ },
477
+ {
478
+ "epoch": 0.6089438629876308,
479
+ "grad_norm": 0.0,
480
+ "learning_rate": 8.098174215547224e-05,
481
+ "loss": 0.0,
482
+ "step": 1280
483
+ },
484
+ {
485
+ "epoch": 0.6184586108468125,
486
+ "grad_norm": 0.0,
487
+ "learning_rate": 8.037730497757143e-05,
488
+ "loss": 0.2685,
489
+ "step": 1300
490
+ },
491
+ {
492
+ "epoch": 0.6279733587059942,
493
+ "grad_norm": 0.0,
494
+ "learning_rate": 7.976574770462743e-05,
495
+ "loss": 0.0258,
496
+ "step": 1320
497
+ },
498
+ {
499
+ "epoch": 0.6374881065651761,
500
+ "grad_norm": 0.0,
501
+ "learning_rate": 7.914721367871454e-05,
502
+ "loss": 0.0001,
503
+ "step": 1340
504
+ },
505
+ {
506
+ "epoch": 0.6470028544243578,
507
+ "grad_norm": 0.0,
508
+ "learning_rate": 7.852184787717871e-05,
509
+ "loss": 0.422,
510
+ "step": 1360
511
+ },
512
+ {
513
+ "epoch": 0.6565176022835395,
514
+ "grad_norm": 3.8072144985198975,
515
+ "learning_rate": 7.788979687865639e-05,
516
+ "loss": 0.409,
517
+ "step": 1380
518
+ },
519
+ {
520
+ "epoch": 0.6660323501427212,
521
+ "grad_norm": 0.0,
522
+ "learning_rate": 7.725120882871828e-05,
523
+ "loss": 0.4012,
524
+ "step": 1400
525
+ },
526
+ {
527
+ "epoch": 0.6755470980019029,
528
+ "grad_norm": 0.0,
529
+ "learning_rate": 7.660623340514561e-05,
530
+ "loss": 0.0639,
531
+ "step": 1420
532
+ },
533
+ {
534
+ "epoch": 0.6850618458610847,
535
+ "grad_norm": 0.0,
536
+ "learning_rate": 7.595502178284743e-05,
537
+ "loss": 0.1534,
538
+ "step": 1440
539
+ },
540
+ {
541
+ "epoch": 0.6945765937202664,
542
+ "grad_norm": 0.0,
543
+ "learning_rate": 7.529772659842685e-05,
544
+ "loss": 0.0362,
545
+ "step": 1460
546
+ },
547
+ {
548
+ "epoch": 0.7040913415794482,
549
+ "grad_norm": 0.0,
550
+ "learning_rate": 7.46345019144048e-05,
551
+ "loss": 0.0066,
552
+ "step": 1480
553
+ },
554
+ {
555
+ "epoch": 0.7136060894386299,
556
+ "grad_norm": 0.0,
557
+ "learning_rate": 7.396550318310957e-05,
558
+ "loss": 0.7555,
559
+ "step": 1500
560
+ },
561
+ {
562
+ "epoch": 0.7136060894386299,
563
+ "eval_loss": NaN,
564
+ "eval_runtime": 7.7236,
565
+ "eval_samples_per_second": 2.848,
566
+ "eval_steps_per_second": 2.848,
567
+ "step": 1500
568
+ },
569
+ {
570
+ "epoch": 0.7231208372978116,
571
+ "grad_norm": 1.2502919435501099,
572
+ "learning_rate": 7.32908872102405e-05,
573
+ "loss": 0.1364,
574
+ "step": 1520
575
+ },
576
+ {
577
+ "epoch": 0.7326355851569933,
578
+ "grad_norm": 0.0,
579
+ "learning_rate": 7.261081211811444e-05,
580
+ "loss": 0.199,
581
+ "step": 1540
582
+ },
583
+ {
584
+ "epoch": 0.7421503330161751,
585
+ "grad_norm": 0.0,
586
+ "learning_rate": 7.192543730860388e-05,
587
+ "loss": 0.2509,
588
+ "step": 1560
589
+ },
590
+ {
591
+ "epoch": 0.7516650808753568,
592
+ "grad_norm": 0.0,
593
+ "learning_rate": 7.123492342577472e-05,
594
+ "loss": 0.1916,
595
+ "step": 1580
596
+ },
597
+ {
598
+ "epoch": 0.7611798287345385,
599
+ "grad_norm": 0.0,
600
+ "learning_rate": 7.053943231823328e-05,
601
+ "loss": 0.3898,
602
+ "step": 1600
603
+ },
604
+ {
605
+ "epoch": 0.7706945765937203,
606
+ "grad_norm": 3.7327072620391846,
607
+ "learning_rate": 6.98391270011908e-05,
608
+ "loss": 0.1748,
609
+ "step": 1620
610
+ },
611
+ {
612
+ "epoch": 0.780209324452902,
613
+ "grad_norm": 0.0,
614
+ "learning_rate": 6.91341716182545e-05,
615
+ "loss": 0.1127,
616
+ "step": 1640
617
+ },
618
+ {
619
+ "epoch": 0.7897240723120837,
620
+ "grad_norm": 0.0,
621
+ "learning_rate": 6.842473140295417e-05,
622
+ "loss": 0.0495,
623
+ "step": 1660
624
+ },
625
+ {
626
+ "epoch": 0.7992388201712655,
627
+ "grad_norm": 0.0,
628
+ "learning_rate": 6.77109726400134e-05,
629
+ "loss": 0.1478,
630
+ "step": 1680
631
+ },
632
+ {
633
+ "epoch": 0.8087535680304472,
634
+ "grad_norm": 0.0,
635
+ "learning_rate": 6.69930626263742e-05,
636
+ "loss": 0.5869,
637
+ "step": 1700
638
+ },
639
+ {
640
+ "epoch": 0.8182683158896289,
641
+ "grad_norm": 0.0,
642
+ "learning_rate": 6.627116963198458e-05,
643
+ "loss": 0.1457,
644
+ "step": 1720
645
+ },
646
+ {
647
+ "epoch": 0.8277830637488106,
648
+ "grad_norm": 0.0,
649
+ "learning_rate": 6.554546286035802e-05,
650
+ "loss": 0.671,
651
+ "step": 1740
652
+ },
653
+ {
654
+ "epoch": 0.8372978116079924,
655
+ "grad_norm": 10.052992820739746,
656
+ "learning_rate": 6.481611240891396e-05,
657
+ "loss": 0.5565,
658
+ "step": 1760
659
+ },
660
+ {
661
+ "epoch": 0.8468125594671742,
662
+ "grad_norm": 0.0,
663
+ "learning_rate": 6.408328922910905e-05,
664
+ "loss": 0.4085,
665
+ "step": 1780
666
+ },
667
+ {
668
+ "epoch": 0.8563273073263559,
669
+ "grad_norm": 0.0,
670
+ "learning_rate": 6.334716508636796e-05,
671
+ "loss": 0.5145,
672
+ "step": 1800
673
+ },
674
+ {
675
+ "epoch": 0.8658420551855376,
676
+ "grad_norm": 0.0,
677
+ "learning_rate": 6.260791251982354e-05,
678
+ "loss": 0.1931,
679
+ "step": 1820
680
+ },
681
+ {
682
+ "epoch": 0.8753568030447193,
683
+ "grad_norm": 0.0,
684
+ "learning_rate": 6.18657048018757e-05,
685
+ "loss": 0.2708,
686
+ "step": 1840
687
+ },
688
+ {
689
+ "epoch": 0.884871550903901,
690
+ "grad_norm": 0.0,
691
+ "learning_rate": 6.112071589757814e-05,
692
+ "loss": 0.3662,
693
+ "step": 1860
694
+ },
695
+ {
696
+ "epoch": 0.8943862987630827,
697
+ "grad_norm": 0.0,
698
+ "learning_rate": 6.037312042386314e-05,
699
+ "loss": 0.0,
700
+ "step": 1880
701
+ },
702
+ {
703
+ "epoch": 0.9039010466222646,
704
+ "grad_norm": 0.0,
705
+ "learning_rate": 5.962309360861318e-05,
706
+ "loss": 0.0005,
707
+ "step": 1900
708
+ },
709
+ {
710
+ "epoch": 0.9134157944814463,
711
+ "grad_norm": 0.0,
712
+ "learning_rate": 5.887081124958954e-05,
713
+ "loss": 0.1641,
714
+ "step": 1920
715
+ },
716
+ {
717
+ "epoch": 0.922930542340628,
718
+ "grad_norm": 0.0,
719
+ "learning_rate": 5.811644967322737e-05,
720
+ "loss": 0.118,
721
+ "step": 1940
722
+ },
723
+ {
724
+ "epoch": 0.9324452901998097,
725
+ "grad_norm": 0.0,
726
+ "learning_rate": 5.73601856933067e-05,
727
+ "loss": 0.0,
728
+ "step": 1960
729
+ },
730
+ {
731
+ "epoch": 0.9419600380589914,
732
+ "grad_norm": 0.0,
733
+ "learning_rate": 5.660219656950937e-05,
734
+ "loss": 0.0804,
735
+ "step": 1980
736
+ },
737
+ {
738
+ "epoch": 0.9514747859181731,
739
+ "grad_norm": 0.0,
740
+ "learning_rate": 5.584265996587129e-05,
741
+ "loss": 0.3809,
742
+ "step": 2000
743
+ },
744
+ {
745
+ "epoch": 0.9514747859181731,
746
+ "eval_loss": NaN,
747
+ "eval_runtime": 7.6077,
748
+ "eval_samples_per_second": 2.892,
749
+ "eval_steps_per_second": 2.892,
750
+ "step": 2000
751
+ },
752
+ {
753
+ "epoch": 0.9609895337773549,
754
+ "grad_norm": 0.0,
755
+ "learning_rate": 5.5081753909140096e-05,
756
+ "loss": 0.1075,
757
+ "step": 2020
758
+ },
759
+ {
760
+ "epoch": 0.9705042816365367,
761
+ "grad_norm": 0.0,
762
+ "learning_rate": 5.4319656747047654e-05,
763
+ "loss": 0.5366,
764
+ "step": 2040
765
+ },
766
+ {
767
+ "epoch": 0.9800190294957184,
768
+ "grad_norm": 0.0,
769
+ "learning_rate": 5.355654710650737e-05,
770
+ "loss": 0.3959,
771
+ "step": 2060
772
+ },
773
+ {
774
+ "epoch": 0.9895337773549001,
775
+ "grad_norm": 0.0,
776
+ "learning_rate": 5.2792603851746125e-05,
777
+ "loss": 0.1957,
778
+ "step": 2080
779
+ },
780
+ {
781
+ "epoch": 0.9990485252140818,
782
+ "grad_norm": 0.0,
783
+ "learning_rate": 5.2028006042380474e-05,
784
+ "loss": 0.184,
785
+ "step": 2100
786
+ },
787
+ {
788
+ "epoch": 1.0085632730732637,
789
+ "grad_norm": 0.0,
790
+ "learning_rate": 5.126293289144715e-05,
791
+ "loss": 0.1857,
792
+ "step": 2120
793
+ },
794
+ {
795
+ "epoch": 1.0180780209324454,
796
+ "grad_norm": 0.0,
797
+ "learning_rate": 5.0497563723397526e-05,
798
+ "loss": 0.5685,
799
+ "step": 2140
800
+ },
801
+ {
802
+ "epoch": 1.027592768791627,
803
+ "grad_norm": 0.0,
804
+ "learning_rate": 4.973207793206604e-05,
805
+ "loss": 0.0807,
806
+ "step": 2160
807
+ },
808
+ {
809
+ "epoch": 1.0371075166508088,
810
+ "grad_norm": 0.0,
811
+ "learning_rate": 4.8966654938622295e-05,
812
+ "loss": 0.6812,
813
+ "step": 2180
814
+ },
815
+ {
816
+ "epoch": 1.0466222645099905,
817
+ "grad_norm": 0.0,
818
+ "learning_rate": 4.820147414951677e-05,
819
+ "loss": 0.0,
820
+ "step": 2200
821
+ },
822
+ {
823
+ "epoch": 1.0561370123691722,
824
+ "grad_norm": 0.0,
825
+ "learning_rate": 4.743671491443005e-05,
826
+ "loss": 0.0253,
827
+ "step": 2220
828
+ },
829
+ {
830
+ "epoch": 1.065651760228354,
831
+ "grad_norm": 2.1436240673065186,
832
+ "learning_rate": 4.6672556484235174e-05,
833
+ "loss": 0.1353,
834
+ "step": 2240
835
+ },
836
+ {
837
+ "epoch": 1.0751665080875357,
838
+ "grad_norm": 0.0,
839
+ "learning_rate": 4.590917796898342e-05,
840
+ "loss": 0.2796,
841
+ "step": 2260
842
+ },
843
+ {
844
+ "epoch": 1.0846812559467174,
845
+ "grad_norm": 0.0,
846
+ "learning_rate": 4.514675829592278e-05,
847
+ "loss": 0.1627,
848
+ "step": 2280
849
+ },
850
+ {
851
+ "epoch": 1.094196003805899,
852
+ "grad_norm": 0.0,
853
+ "learning_rate": 4.438547616755962e-05,
854
+ "loss": 0.0606,
855
+ "step": 2300
856
+ },
857
+ {
858
+ "epoch": 1.1037107516650808,
859
+ "grad_norm": 0.0,
860
+ "learning_rate": 4.3625510019772715e-05,
861
+ "loss": 0.008,
862
+ "step": 2320
863
+ },
864
+ {
865
+ "epoch": 1.1132254995242625,
866
+ "grad_norm": 4.797143459320068,
867
+ "learning_rate": 4.2867037979989957e-05,
868
+ "loss": 0.1238,
869
+ "step": 2340
870
+ },
871
+ {
872
+ "epoch": 1.1227402473834442,
873
+ "grad_norm": 0.0,
874
+ "learning_rate": 4.211023782543727e-05,
875
+ "loss": 0.1468,
876
+ "step": 2360
877
+ },
878
+ {
879
+ "epoch": 1.1322549952426262,
880
+ "grad_norm": 0.0,
881
+ "learning_rate": 4.135528694146974e-05,
882
+ "loss": 0.4943,
883
+ "step": 2380
884
+ },
885
+ {
886
+ "epoch": 1.141769743101808,
887
+ "grad_norm": 0.0,
888
+ "learning_rate": 4.060236227999441e-05,
889
+ "loss": 0.1699,
890
+ "step": 2400
891
+ },
892
+ {
893
+ "epoch": 1.1512844909609896,
894
+ "grad_norm": 0.0,
895
+ "learning_rate": 3.985164031799481e-05,
896
+ "loss": 0.1025,
897
+ "step": 2420
898
+ },
899
+ {
900
+ "epoch": 1.1607992388201713,
901
+ "grad_norm": 0.0,
902
+ "learning_rate": 3.910329701616673e-05,
903
+ "loss": 0.0501,
904
+ "step": 2440
905
+ },
906
+ {
907
+ "epoch": 1.170313986679353,
908
+ "grad_norm": 0.0,
909
+ "learning_rate": 3.8357507777675056e-05,
910
+ "loss": 0.1253,
911
+ "step": 2460
912
+ },
913
+ {
914
+ "epoch": 1.1798287345385348,
915
+ "grad_norm": 0.0,
916
+ "learning_rate": 3.761444740704129e-05,
917
+ "loss": 0.0181,
918
+ "step": 2480
919
+ },
920
+ {
921
+ "epoch": 1.1893434823977165,
922
+ "grad_norm": 0.0,
923
+ "learning_rate": 3.687429006917128e-05,
924
+ "loss": 0.0237,
925
+ "step": 2500
926
+ },
927
+ {
928
+ "epoch": 1.1893434823977165,
929
+ "eval_loss": NaN,
930
+ "eval_runtime": 7.4307,
931
+ "eval_samples_per_second": 2.961,
932
+ "eval_steps_per_second": 2.961,
933
+ "step": 2500
934
+ },
935
+ {
936
+ "epoch": 1.1988582302568982,
937
+ "grad_norm": 0.0,
938
+ "learning_rate": 3.613720924853299e-05,
939
+ "loss": 0.1829,
940
+ "step": 2520
941
+ },
942
+ {
943
+ "epoch": 1.20837297811608,
944
+ "grad_norm": 0.0,
945
+ "learning_rate": 3.540337770849371e-05,
946
+ "loss": 0.0596,
947
+ "step": 2540
948
+ },
949
+ {
950
+ "epoch": 1.2178877259752616,
951
+ "grad_norm": 0.0,
952
+ "learning_rate": 3.467296745082634e-05,
953
+ "loss": 0.0627,
954
+ "step": 2560
955
+ },
956
+ {
957
+ "epoch": 1.2274024738344433,
958
+ "grad_norm": 0.0,
959
+ "learning_rate": 3.394614967539402e-05,
960
+ "loss": 0.0365,
961
+ "step": 2580
962
+ },
963
+ {
964
+ "epoch": 1.236917221693625,
965
+ "grad_norm": 0.0,
966
+ "learning_rate": 3.322309474002288e-05,
967
+ "loss": 0.0,
968
+ "step": 2600
969
+ },
970
+ {
971
+ "epoch": 1.2464319695528068,
972
+ "grad_norm": 0.0,
973
+ "learning_rate": 3.25039721205721e-05,
974
+ "loss": 0.0425,
975
+ "step": 2620
976
+ },
977
+ {
978
+ "epoch": 1.2559467174119887,
979
+ "grad_norm": 0.0,
980
+ "learning_rate": 3.178895037121077e-05,
981
+ "loss": 0.0774,
982
+ "step": 2640
983
+ },
984
+ {
985
+ "epoch": 1.2654614652711702,
986
+ "grad_norm": 0.0,
987
+ "learning_rate": 3.107819708491059e-05,
988
+ "loss": 0.5383,
989
+ "step": 2660
990
+ },
991
+ {
992
+ "epoch": 1.2749762131303521,
993
+ "grad_norm": 0.0,
994
+ "learning_rate": 3.037187885416398e-05,
995
+ "loss": 0.0006,
996
+ "step": 2680
997
+ },
998
+ {
999
+ "epoch": 1.2844909609895339,
1000
+ "grad_norm": 0.0,
1001
+ "learning_rate": 2.967016123193682e-05,
1002
+ "loss": 0.0,
1003
+ "step": 2700
1004
+ },
1005
+ {
1006
+ "epoch": 1.2940057088487156,
1007
+ "grad_norm": 0.0,
1008
+ "learning_rate": 2.8973208692864624e-05,
1009
+ "loss": 0.0453,
1010
+ "step": 2720
1011
+ },
1012
+ {
1013
+ "epoch": 1.3035204567078973,
1014
+ "grad_norm": 0.0,
1015
+ "learning_rate": 2.828118459470156e-05,
1016
+ "loss": 0.0058,
1017
+ "step": 2740
1018
+ },
1019
+ {
1020
+ "epoch": 1.313035204567079,
1021
+ "grad_norm": 0.0,
1022
+ "learning_rate": 2.7594251140031223e-05,
1023
+ "loss": 0.0,
1024
+ "step": 2760
1025
+ },
1026
+ {
1027
+ "epoch": 1.3225499524262607,
1028
+ "grad_norm": 0.0,
1029
+ "learning_rate": 2.6912569338248315e-05,
1030
+ "loss": 0.0795,
1031
+ "step": 2780
1032
+ },
1033
+ {
1034
+ "epoch": 1.3320647002854424,
1035
+ "grad_norm": 0.0,
1036
+ "learning_rate": 2.623629896781969e-05,
1037
+ "loss": 0.1333,
1038
+ "step": 2800
1039
+ },
1040
+ {
1041
+ "epoch": 1.3415794481446242,
1042
+ "grad_norm": 0.0,
1043
+ "learning_rate": 2.5565598538834286e-05,
1044
+ "loss": 0.0725,
1045
+ "step": 2820
1046
+ },
1047
+ {
1048
+ "epoch": 1.3510941960038059,
1049
+ "grad_norm": 1.8207381963729858,
1050
+ "learning_rate": 2.4900625255849986e-05,
1051
+ "loss": 0.0143,
1052
+ "step": 2840
1053
+ },
1054
+ {
1055
+ "epoch": 1.3606089438629876,
1056
+ "grad_norm": 3.001304864883423,
1057
+ "learning_rate": 2.4241534981046815e-05,
1058
+ "loss": 0.0415,
1059
+ "step": 2860
1060
+ },
1061
+ {
1062
+ "epoch": 1.3701236917221693,
1063
+ "grad_norm": 2.418036699295044,
1064
+ "learning_rate": 2.3588482197694478e-05,
1065
+ "loss": 0.0665,
1066
+ "step": 2880
1067
+ },
1068
+ {
1069
+ "epoch": 1.379638439581351,
1070
+ "grad_norm": 0.0,
1071
+ "learning_rate": 2.294161997394336e-05,
1072
+ "loss": 0.1571,
1073
+ "step": 2900
1074
+ },
1075
+ {
1076
+ "epoch": 1.3891531874405327,
1077
+ "grad_norm": 0.0,
1078
+ "learning_rate": 2.2301099926946968e-05,
1079
+ "loss": 0.12,
1080
+ "step": 2920
1081
+ },
1082
+ {
1083
+ "epoch": 1.3986679352997147,
1084
+ "grad_norm": 0.0,
1085
+ "learning_rate": 2.1667072187324726e-05,
1086
+ "loss": 0.0335,
1087
+ "step": 2940
1088
+ },
1089
+ {
1090
+ "epoch": 1.4081826831588962,
1091
+ "grad_norm": 0.0,
1092
+ "learning_rate": 2.1039685363972934e-05,
1093
+ "loss": 0.0007,
1094
+ "step": 2960
1095
+ },
1096
+ {
1097
+ "epoch": 1.417697431018078,
1098
+ "grad_norm": 0.0,
1099
+ "learning_rate": 2.0419086509232648e-05,
1100
+ "loss": 0.0988,
1101
+ "step": 2980
1102
+ },
1103
+ {
1104
+ "epoch": 1.4272121788772598,
1105
+ "grad_norm": 0.0,
1106
+ "learning_rate": 1.9805421084422167e-05,
1107
+ "loss": 0.0749,
1108
+ "step": 3000
1109
+ },
1110
+ {
1111
+ "epoch": 1.4272121788772598,
1112
+ "eval_loss": NaN,
1113
+ "eval_runtime": 7.6652,
1114
+ "eval_samples_per_second": 2.87,
1115
+ "eval_steps_per_second": 2.87,
1116
+ "step": 3000
1117
+ },
1118
+ {
1119
+ "epoch": 1.4367269267364415,
1120
+ "grad_norm": 0.0,
1121
+ "learning_rate": 1.919883292574269e-05,
1122
+ "loss": 0.1049,
1123
+ "step": 3020
1124
+ },
1125
+ {
1126
+ "epoch": 1.4462416745956232,
1127
+ "grad_norm": 0.0,
1128
+ "learning_rate": 1.8599464210564586e-05,
1129
+ "loss": 0.0917,
1130
+ "step": 3040
1131
+ },
1132
+ {
1133
+ "epoch": 1.455756422454805,
1134
+ "grad_norm": 0.0,
1135
+ "learning_rate": 1.8007455424102797e-05,
1136
+ "loss": 0.3418,
1137
+ "step": 3060
1138
+ },
1139
+ {
1140
+ "epoch": 1.4652711703139867,
1141
+ "grad_norm": 0.0,
1142
+ "learning_rate": 1.7422945326488553e-05,
1143
+ "loss": 0.0139,
1144
+ "step": 3080
1145
+ },
1146
+ {
1147
+ "epoch": 1.4747859181731684,
1148
+ "grad_norm": 0.0,
1149
+ "learning_rate": 1.68460709202457e-05,
1150
+ "loss": 0.0101,
1151
+ "step": 3100
1152
+ },
1153
+ {
1154
+ "epoch": 1.4843006660323501,
1155
+ "grad_norm": 0.0,
1156
+ "learning_rate": 1.6276967418178746e-05,
1157
+ "loss": 0.0394,
1158
+ "step": 3120
1159
+ },
1160
+ {
1161
+ "epoch": 1.4938154138915318,
1162
+ "grad_norm": 0.0,
1163
+ "learning_rate": 1.5715768211680647e-05,
1164
+ "loss": 0.1327,
1165
+ "step": 3140
1166
+ },
1167
+ {
1168
+ "epoch": 1.5033301617507138,
1169
+ "grad_norm": 0.0,
1170
+ "learning_rate": 1.5162604839467265e-05,
1171
+ "loss": 0.0294,
1172
+ "step": 3160
1173
+ },
1174
+ {
1175
+ "epoch": 1.5128449096098953,
1176
+ "grad_norm": 0.0,
1177
+ "learning_rate": 1.4617606956746216e-05,
1178
+ "loss": 0.0,
1179
+ "step": 3180
1180
+ },
1181
+ {
1182
+ "epoch": 1.5223596574690772,
1183
+ "grad_norm": 0.0,
1184
+ "learning_rate": 1.4080902304827242e-05,
1185
+ "loss": 0.0,
1186
+ "step": 3200
1187
+ },
1188
+ {
1189
+ "epoch": 1.5318744053282587,
1190
+ "grad_norm": 0.0,
1191
+ "learning_rate": 1.3552616681181013e-05,
1192
+ "loss": 0.1559,
1193
+ "step": 3220
1194
+ },
1195
+ {
1196
+ "epoch": 1.5413891531874406,
1197
+ "grad_norm": 0.0,
1198
+ "learning_rate": 1.3032873909953636e-05,
1199
+ "loss": 0.0824,
1200
+ "step": 3240
1201
+ },
1202
+ {
1203
+ "epoch": 1.5509039010466221,
1204
+ "grad_norm": 0.0,
1205
+ "learning_rate": 1.2521795812943704e-05,
1206
+ "loss": 0.0029,
1207
+ "step": 3260
1208
+ },
1209
+ {
1210
+ "epoch": 1.560418648905804,
1211
+ "grad_norm": 0.0,
1212
+ "learning_rate": 1.2019502181048676e-05,
1213
+ "loss": 0.0081,
1214
+ "step": 3280
1215
+ },
1216
+ {
1217
+ "epoch": 1.5699333967649858,
1218
+ "grad_norm": 0.0,
1219
+ "learning_rate": 1.1526110746187224e-05,
1220
+ "loss": 0.1394,
1221
+ "step": 3300
1222
+ },
1223
+ {
1224
+ "epoch": 1.5794481446241675,
1225
+ "grad_norm": 0.0,
1226
+ "learning_rate": 1.1041737153704185e-05,
1227
+ "loss": 0.0001,
1228
+ "step": 3320
1229
+ },
1230
+ {
1231
+ "epoch": 1.5889628924833492,
1232
+ "grad_norm": 0.076429083943367,
1233
+ "learning_rate": 1.0566494935264625e-05,
1234
+ "loss": 0.0008,
1235
+ "step": 3340
1236
+ },
1237
+ {
1238
+ "epoch": 1.598477640342531,
1239
+ "grad_norm": 0.0,
1240
+ "learning_rate": 1.0100495482243356e-05,
1241
+ "loss": 0.0104,
1242
+ "step": 3360
1243
+ },
1244
+ {
1245
+ "epoch": 1.6079923882017126,
1246
+ "grad_norm": 0.0,
1247
+ "learning_rate": 9.643848019616003e-06,
1248
+ "loss": 0.0239,
1249
+ "step": 3380
1250
+ },
1251
+ {
1252
+ "epoch": 1.6175071360608944,
1253
+ "grad_norm": 0.0,
1254
+ "learning_rate": 9.196659580357913e-06,
1255
+ "loss": 0.0049,
1256
+ "step": 3400
1257
+ },
1258
+ {
1259
+ "epoch": 1.627021883920076,
1260
+ "grad_norm": 0.039900340139865875,
1261
+ "learning_rate": 8.759034980356862e-06,
1262
+ "loss": 0.0046,
1263
+ "step": 3420
1264
+ },
1265
+ {
1266
+ "epoch": 1.6365366317792578,
1267
+ "grad_norm": 0.0,
1268
+ "learning_rate": 8.33107679384542e-06,
1269
+ "loss": 0.2859,
1270
+ "step": 3440
1271
+ },
1272
+ {
1273
+ "epoch": 1.6460513796384397,
1274
+ "grad_norm": 0.0,
1275
+ "learning_rate": 7.912885329358688e-06,
1276
+ "loss": 0.193,
1277
+ "step": 3460
1278
+ },
1279
+ {
1280
+ "epoch": 1.6555661274976212,
1281
+ "grad_norm": 0.0,
1282
+ "learning_rate": 7.504558606223122e-06,
1283
+ "loss": 0.3163,
1284
+ "step": 3480
1285
+ },
1286
+ {
1287
+ "epoch": 1.6650808753568032,
1288
+ "grad_norm": 0.0,
1289
+ "learning_rate": 7.106192331581896e-06,
1290
+ "loss": 0.0896,
1291
+ "step": 3500
1292
+ },
1293
+ {
1294
+ "epoch": 1.6650808753568032,
1295
+ "eval_loss": NaN,
1296
+ "eval_runtime": 9.6486,
1297
+ "eval_samples_per_second": 2.28,
1298
+ "eval_steps_per_second": 2.28,
1299
+ "step": 3500
1300
+ },
1301
+ {
1302
+ "epoch": 1.6745956232159847,
1303
+ "grad_norm": 4.094954490661621,
1304
+ "learning_rate": 6.717879877962291e-06,
1305
+ "loss": 0.0605,
1306
+ "step": 3520
1307
+ },
1308
+ {
1309
+ "epoch": 1.6841103710751666,
1310
+ "grad_norm": 0.0,
1311
+ "learning_rate": 6.339712261390213e-06,
1312
+ "loss": 0.1816,
1313
+ "step": 3540
1314
+ },
1315
+ {
1316
+ "epoch": 1.693625118934348,
1317
+ "grad_norm": 0.0,
1318
+ "learning_rate": 5.971778120057031e-06,
1319
+ "loss": 0.0005,
1320
+ "step": 3560
1321
+ },
1322
+ {
1323
+ "epoch": 1.70313986679353,
1324
+ "grad_norm": 0.0,
1325
+ "learning_rate": 5.614163693543822e-06,
1326
+ "loss": 0.0328,
1327
+ "step": 3580
1328
+ },
1329
+ {
1330
+ "epoch": 1.7126546146527117,
1331
+ "grad_norm": 7.35809850692749,
1332
+ "learning_rate": 5.266952802607828e-06,
1333
+ "loss": 0.2526,
1334
+ "step": 3600
1335
+ },
1336
+ {
1337
+ "epoch": 1.7221693625118935,
1338
+ "grad_norm": 0.0,
1339
+ "learning_rate": 4.930226829535767e-06,
1340
+ "loss": 0.1301,
1341
+ "step": 3620
1342
+ },
1343
+ {
1344
+ "epoch": 1.7316841103710752,
1345
+ "grad_norm": 0.0,
1346
+ "learning_rate": 4.604064699068766e-06,
1347
+ "loss": 0.0367,
1348
+ "step": 3640
1349
+ },
1350
+ {
1351
+ "epoch": 1.7411988582302569,
1352
+ "grad_norm": 0.0,
1353
+ "learning_rate": 4.288542859903316e-06,
1354
+ "loss": 0.0089,
1355
+ "step": 3660
1356
+ },
1357
+ {
1358
+ "epoch": 1.7507136060894386,
1359
+ "grad_norm": 0.0,
1360
+ "learning_rate": 3.983735266772565e-06,
1361
+ "loss": 0.0477,
1362
+ "step": 3680
1363
+ },
1364
+ {
1365
+ "epoch": 1.7602283539486203,
1366
+ "grad_norm": 0.0,
1367
+ "learning_rate": 3.689713363112146e-06,
1368
+ "loss": 0.0,
1369
+ "step": 3700
1370
+ },
1371
+ {
1372
+ "epoch": 1.7697431018078023,
1373
+ "grad_norm": 0.0,
1374
+ "learning_rate": 3.406546064314664e-06,
1375
+ "loss": 0.0074,
1376
+ "step": 3720
1377
+ },
1378
+ {
1379
+ "epoch": 1.7792578496669837,
1380
+ "grad_norm": 2.901078462600708,
1381
+ "learning_rate": 3.1342997415767015e-06,
1382
+ "loss": 0.0307,
1383
+ "step": 3740
1384
+ },
1385
+ {
1386
+ "epoch": 1.7887725975261657,
1387
+ "grad_norm": 0.0,
1388
+ "learning_rate": 2.873038206342188e-06,
1389
+ "loss": 0.0974,
1390
+ "step": 3760
1391
+ },
1392
+ {
1393
+ "epoch": 1.7982873453853472,
1394
+ "grad_norm": 0.0,
1395
+ "learning_rate": 2.622822695345706e-06,
1396
+ "loss": 0.0024,
1397
+ "step": 3780
1398
+ },
1399
+ {
1400
+ "epoch": 1.8078020932445291,
1401
+ "grad_norm": 0.0,
1402
+ "learning_rate": 2.3837118562592797e-06,
1403
+ "loss": 0.0944,
1404
+ "step": 3800
1405
+ },
1406
+ {
1407
+ "epoch": 1.8173168411037106,
1408
+ "grad_norm": 0.0,
1409
+ "learning_rate": 2.1557617339460432e-06,
1410
+ "loss": 0.0281,
1411
+ "step": 3820
1412
+ },
1413
+ {
1414
+ "epoch": 1.8268315889628925,
1415
+ "grad_norm": 0.0,
1416
+ "learning_rate": 1.939025757323987e-06,
1417
+ "loss": 0.1706,
1418
+ "step": 3840
1419
+ },
1420
+ {
1421
+ "epoch": 1.8363463368220743,
1422
+ "grad_norm": 3.4238052368164062,
1423
+ "learning_rate": 1.7335547268427843e-06,
1424
+ "loss": 0.0456,
1425
+ "step": 3860
1426
+ },
1427
+ {
1428
+ "epoch": 1.845861084681256,
1429
+ "grad_norm": 0.0,
1430
+ "learning_rate": 1.5393968025767702e-06,
1431
+ "loss": 0.2662,
1432
+ "step": 3880
1433
+ },
1434
+ {
1435
+ "epoch": 1.8553758325404377,
1436
+ "grad_norm": 0.0,
1437
+ "learning_rate": 1.3565974929367397e-06,
1438
+ "loss": 0.1209,
1439
+ "step": 3900
1440
+ },
1441
+ {
1442
+ "epoch": 1.8648905803996194,
1443
+ "grad_norm": 0.0,
1444
+ "learning_rate": 1.1851996440033319e-06,
1445
+ "loss": 0.1809,
1446
+ "step": 3920
1447
+ },
1448
+ {
1449
+ "epoch": 1.8744053282588011,
1450
+ "grad_norm": 0.0,
1451
+ "learning_rate": 1.0252434294843737e-06,
1452
+ "loss": 0.1648,
1453
+ "step": 3940
1454
+ },
1455
+ {
1456
+ "epoch": 1.8839200761179828,
1457
+ "grad_norm": 0.0,
1458
+ "learning_rate": 8.767663412986127e-07,
1459
+ "loss": 0.3638,
1460
+ "step": 3960
1461
+ },
1462
+ {
1463
+ "epoch": 1.8934348239771646,
1464
+ "grad_norm": 0.0,
1465
+ "learning_rate": 7.398031807880457e-07,
1466
+ "loss": 0.0223,
1467
+ "step": 3980
1468
+ },
1469
+ {
1470
+ "epoch": 1.9029495718363463,
1471
+ "grad_norm": 0.0,
1472
+ "learning_rate": 6.143860505608945e-07,
1473
+ "loss": 0.0,
1474
+ "step": 4000
1475
+ },
1476
+ {
1477
+ "epoch": 1.9029495718363463,
1478
+ "eval_loss": NaN,
1479
+ "eval_runtime": 8.4613,
1480
+ "eval_samples_per_second": 2.6,
1481
+ "eval_steps_per_second": 2.6,
1482
+ "step": 4000
1483
+ },
1484
+ {
1485
+ "epoch": 1.9124643196955282,
1486
+ "grad_norm": 0.0,
1487
+ "learning_rate": 5.005443469670967e-07,
1488
+ "loss": 0.0384,
1489
+ "step": 4020
1490
+ },
1491
+ {
1492
+ "epoch": 1.9219790675547097,
1493
+ "grad_norm": 0.0,
1494
+ "learning_rate": 3.983047532081341e-07,
1495
+ "loss": 0.0489,
1496
+ "step": 4040
1497
+ },
1498
+ {
1499
+ "epoch": 1.9314938154138916,
1500
+ "grad_norm": 0.0,
1501
+ "learning_rate": 3.0769123308281854e-07,
1502
+ "loss": 0.0271,
1503
+ "step": 4060
1504
+ },
1505
+ {
1506
+ "epoch": 1.9410085632730731,
1507
+ "grad_norm": 1.605538010597229,
1508
+ "learning_rate": 2.2872502537042984e-07,
1509
+ "loss": 0.2483,
1510
+ "step": 4080
1511
+ },
1512
+ {
1513
+ "epoch": 1.950523311132255,
1514
+ "grad_norm": 0.0,
1515
+ "learning_rate": 1.614246388525864e-07,
1516
+ "loss": 0.0214,
1517
+ "step": 4100
1518
+ },
1519
+ {
1520
+ "epoch": 1.9600380589914366,
1521
+ "grad_norm": 0.0,
1522
+ "learning_rate": 1.0580584797501014e-07,
1523
+ "loss": 0.3865,
1524
+ "step": 4120
1525
+ },
1526
+ {
1527
+ "epoch": 1.9695528068506185,
1528
+ "grad_norm": 0.0,
1529
+ "learning_rate": 6.188168915016168e-08,
1530
+ "loss": 0.0641,
1531
+ "step": 4140
1532
+ },
1533
+ {
1534
+ "epoch": 1.9790675547098002,
1535
+ "grad_norm": 1.9755995273590088,
1536
+ "learning_rate": 2.9662457701662428e-08,
1537
+ "loss": 0.0983,
1538
+ "step": 4160
1539
+ },
1540
+ {
1541
+ "epoch": 1.988582302568982,
1542
+ "grad_norm": 0.0,
1543
+ "learning_rate": 9.155705451191488e-09,
1544
+ "loss": 0.0,
1545
+ "step": 4180
1546
+ },
1547
+ {
1548
+ "epoch": 1.9980970504281637,
1549
+ "grad_norm": 2.224816083908081,
1550
+ "learning_rate": 3.6623894841270047e-10,
1551
+ "loss": 0.1236,
1552
+ "step": 4200
1553
+ }
1554
+ ],
1555
+ "logging_steps": 20,
1556
+ "max_steps": 4204,
1557
+ "num_input_tokens_seen": 0,
1558
+ "num_train_epochs": 2,
1559
+ "save_steps": 500,
1560
+ "stateful_callbacks": {
1561
+ "TrainerControl": {
1562
+ "args": {
1563
+ "should_epoch_stop": false,
1564
+ "should_evaluate": false,
1565
+ "should_log": false,
1566
+ "should_save": true,
1567
+ "should_training_stop": true
1568
+ },
1569
+ "attributes": {}
1570
+ }
1571
+ },
1572
+ "total_flos": 1.7427745379608166e+17,
1573
+ "train_batch_size": 1,
1574
+ "trial_name": null,
1575
+ "trial_params": null
1576
+ }
checkpoint-4204/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55792dc953d95a676ce206247ddbbaa700c8e5f330681c33f8f8cb36f6666d26
3
+ size 12561
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SmolLM3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "eos_token_id": 128012,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 2048,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 11008,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention"
50
+ ],
51
+ "max_position_embeddings": 65536,
52
+ "max_window_layers": 28,
53
+ "mlp_bias": false,
54
+ "model_type": "smollm3",
55
+ "no_rope_layer_interval": 4,
56
+ "no_rope_layers": [
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 0,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 0,
65
+ 1,
66
+ 1,
67
+ 1,
68
+ 0,
69
+ 1,
70
+ 1,
71
+ 1,
72
+ 0,
73
+ 1,
74
+ 1,
75
+ 1,
76
+ 0,
77
+ 1,
78
+ 1,
79
+ 1,
80
+ 0,
81
+ 1,
82
+ 1,
83
+ 1,
84
+ 0,
85
+ 1,
86
+ 1,
87
+ 1,
88
+ 0,
89
+ 1,
90
+ 1,
91
+ 1,
92
+ 0
93
+ ],
94
+ "num_attention_heads": 16,
95
+ "num_hidden_layers": 36,
96
+ "num_key_value_heads": 4,
97
+ "pad_token_id": 128004,
98
+ "pretraining_tp": 1,
99
+ "rms_norm_eps": 1e-06,
100
+ "rope_scaling": null,
101
+ "rope_theta": 5000000.0,
102
+ "sliding_window": null,
103
+ "torch_dtype": "bfloat16",
104
+ "transformers_version": "4.53.3",
105
+ "use_cache": false,
106
+ "use_sliding_window": false,
107
+ "vocab_size": 128256
108
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "<|im_end|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6a500b662a34eb3f0374db856ba4ad7de4c81040571d78dc0d357238930005
3
+ size 17208819
tokenizer_config.json ADDED
@@ -0,0 +1,2064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<think>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "128003": {
28
+ "content": "</think>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|im_start|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|im_end|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<tool_response>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "128014": {
116
+ "content": "</tool_response>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "128015": {
124
+ "content": "<tool_call>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "128016": {
132
+ "content": "</tool_call>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "128017": {
140
+ "content": "<code>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "128018": {
148
+ "content": "</code>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": null,
2053
+ "clean_up_tokenization_spaces": true,
2054
+ "eos_token": "<|im_end|>",
2055
+ "extra_special_tokens": {},
2056
+ "fast": false,
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
+ "model_max_length": 131072,
2062
+ "pad_token": "<|im_end|>",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
+ }