Hplm
/

Efi commited on
Commit
575e645
·
verified ·
1 Parent(s): 08a4276

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +3 -14
  2. tokenizer_config.json +4 -3
special_tokens_map.json CHANGED
@@ -1,18 +1,7 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
  "unk_token": {
17
  "content": "<|endoftext|>",
18
  "lstrip": false,
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
 
 
 
 
 
 
 
 
 
 
 
5
  "unk_token": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -34,10 +34,11 @@
34
  "special": true
35
  }
36
  },
37
- "bos_token": "<|endoftext|>",
38
  "clean_up_tokenization_spaces": false,
39
- "eos_token": "<|endoftext|>",
40
- "model_max_length": 1000000000000000019884624838656,
 
41
  "tokenizer_class": "GPT2Tokenizer",
42
  "unk_token": "<|endoftext|>"
43
  }
 
34
  "special": true
35
  }
36
  },
37
+ "bos_token": "<s>",
38
  "clean_up_tokenization_spaces": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 128,
41
+ "pad_token": "<pad>",
42
  "tokenizer_class": "GPT2Tokenizer",
43
  "unk_token": "<|endoftext|>"
44
  }