The tokenizer adds a special token '<|im_end|>' to solve the problem of non-stop generation when encountering <|im_end|>.
#12
by
zjyhf
- opened
- tokenizer.json +1 -1
tokenizer.json
CHANGED
|
@@ -95,7 +95,7 @@
|
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"id": 128010,
|
| 98 |
-
"content": "<|
|
| 99 |
"single_word": false,
|
| 100 |
"lstrip": false,
|
| 101 |
"rstrip": false,
|
|
|
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"id": 128010,
|
| 98 |
+
"content": "<|im_end|>",
|
| 99 |
"single_word": false,
|
| 100 |
"lstrip": false,
|
| 101 |
"rstrip": false,
|