agentlans commited on
Commit
43e0c3d
·
verified ·
1 Parent(s): b98b1ee

Upload 3 files

Browse files
special_tokens_map.json CHANGED
@@ -1,20 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<|eot_id|>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<|eom_id|>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- }
17
- ],
18
  "bos_token": {
19
  "content": "<|begin_of_text|>",
20
  "lstrip": false,
@@ -23,11 +7,10 @@
23
  "single_word": false
24
  },
25
  "eos_token": {
26
- "content": "<|end_of_text|>",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
30
  "single_word": false
31
- },
32
- "pad_token": "<|end_of_text|>"
33
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|begin_of_text|>",
4
  "lstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ }
 
16
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79e3e522635f3171300913bb421464a87de6222182a0570b9b2ccba2a964b2b4
3
+ size 9085657
tokenizer_config.json CHANGED
@@ -2049,21 +2049,14 @@
2049
  "special": true
2050
  }
2051
  },
2052
- "additional_special_tokens": [
2053
- "<|eot_id|>",
2054
- "<|eom_id|>"
2055
- ],
2056
  "bos_token": "<|begin_of_text|>",
2057
- "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' }}{% endif %}{% endfor %}",
2058
  "clean_up_tokenization_spaces": true,
2059
- "eos_token": "<|end_of_text|>",
2060
  "model_input_names": [
2061
  "input_ids",
2062
  "attention_mask"
2063
  ],
2064
  "model_max_length": 131072,
2065
- "pad_token": "<|end_of_text|>",
2066
- "padding_side": "left",
2067
- "split_special_tokens": false,
2068
  "tokenizer_class": "PreTrainedTokenizerFast"
2069
  }
 
2049
  "special": true
2050
  }
2051
  },
 
 
 
 
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
 
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }