Commit
·
faa6ae2
1
Parent(s):
253c6db
add tokenizer
Browse files- tokenizer.json +62 -62
- tokenizer_config.json +1 -1
tokenizer.json
CHANGED
|
@@ -129,67 +129,67 @@
|
|
| 129 |
"y": 65,
|
| 130 |
"z": 66,
|
| 131 |
"##u": 67,
|
| 132 |
-
"##
|
| 133 |
-
"##
|
| 134 |
-
"##
|
| 135 |
-
"##
|
| 136 |
-
"##
|
| 137 |
-
"##
|
| 138 |
-
"##
|
| 139 |
-
"##
|
| 140 |
-
"##
|
| 141 |
-
"##
|
| 142 |
-
"##
|
| 143 |
-
"##
|
| 144 |
"##m": 80,
|
| 145 |
-
"##
|
| 146 |
-
"##
|
| 147 |
-
"##
|
| 148 |
-
"##
|
| 149 |
-
"##
|
| 150 |
-
"##
|
| 151 |
-
"##
|
| 152 |
-
"##
|
| 153 |
-
"##
|
| 154 |
-
"##
|
| 155 |
-
"##
|
| 156 |
-
"##
|
| 157 |
-
"##
|
| 158 |
-
"##
|
| 159 |
-
"##
|
| 160 |
-
"##
|
| 161 |
-
"##
|
| 162 |
-
"##
|
| 163 |
-
"##
|
| 164 |
-
"##
|
| 165 |
-
"##
|
| 166 |
-
"##
|
| 167 |
-
"##
|
| 168 |
-
"##
|
| 169 |
-
"##
|
| 170 |
-
"##
|
| 171 |
-
"##
|
| 172 |
-
"##
|
| 173 |
-
"##
|
| 174 |
-
"##
|
| 175 |
"##P": 111,
|
| 176 |
-
"##
|
| 177 |
-
"##
|
| 178 |
-
"##
|
| 179 |
-
"##
|
| 180 |
-
"##
|
| 181 |
-
"##
|
| 182 |
-
"##
|
| 183 |
-
"##
|
| 184 |
-
"##
|
| 185 |
-
"##
|
| 186 |
-
"##
|
| 187 |
-
"##
|
| 188 |
-
"##
|
| 189 |
-
"##
|
| 190 |
"##X": 126,
|
| 191 |
-
"##
|
| 192 |
-
"##
|
| 193 |
"##he": 129,
|
| 194 |
"the": 130,
|
| 195 |
"##on": 131,
|
|
@@ -5493,8 +5493,8 @@
|
|
| 5493 |
"colon": 5429,
|
| 5494 |
"depression": 5430,
|
| 5495 |
"##known": 5431,
|
| 5496 |
-
"##
|
| 5497 |
-
"##
|
| 5498 |
"##insula": 5434,
|
| 5499 |
"outdoor": 5435,
|
| 5500 |
"##unk": 5436,
|
|
@@ -9005,8 +9005,8 @@
|
|
| 9005 |
"IoT": 8941,
|
| 9006 |
"tac": 8942,
|
| 9007 |
"staying": 8943,
|
| 9008 |
-
"##
|
| 9009 |
-
"##
|
| 9010 |
"heated": 8946,
|
| 9011 |
"none": 8947,
|
| 9012 |
"temperatures": 8948,
|
|
|
|
| 129 |
"y": 65,
|
| 130 |
"z": 66,
|
| 131 |
"##u": 67,
|
| 132 |
+
"##k": 68,
|
| 133 |
+
"##w": 69,
|
| 134 |
+
"##a": 70,
|
| 135 |
+
"##n": 71,
|
| 136 |
+
"##g": 72,
|
| 137 |
+
"##s": 73,
|
| 138 |
+
"##e": 74,
|
| 139 |
+
"##b": 75,
|
| 140 |
+
"##o": 76,
|
| 141 |
+
"##r": 77,
|
| 142 |
+
"##t": 78,
|
| 143 |
+
"##i": 79,
|
| 144 |
"##m": 80,
|
| 145 |
+
"##d": 81,
|
| 146 |
+
"##x": 82,
|
| 147 |
+
"##O": 83,
|
| 148 |
+
"##l": 84,
|
| 149 |
+
"##c": 85,
|
| 150 |
+
"##v": 86,
|
| 151 |
+
"##A": 87,
|
| 152 |
+
"##C": 88,
|
| 153 |
+
"##2": 89,
|
| 154 |
+
"##R": 90,
|
| 155 |
+
"##1": 91,
|
| 156 |
+
"##h": 92,
|
| 157 |
+
"##j": 93,
|
| 158 |
+
"##p": 94,
|
| 159 |
+
"##S": 95,
|
| 160 |
+
"##y": 96,
|
| 161 |
+
"##7": 97,
|
| 162 |
+
"##6": 98,
|
| 163 |
+
"##5": 99,
|
| 164 |
+
"##0": 100,
|
| 165 |
+
"##9": 101,
|
| 166 |
+
"##4": 102,
|
| 167 |
+
"##3": 103,
|
| 168 |
+
"##E": 104,
|
| 169 |
+
"##L": 105,
|
| 170 |
+
"##M": 106,
|
| 171 |
+
"##D": 107,
|
| 172 |
+
"##U": 108,
|
| 173 |
+
"##B": 109,
|
| 174 |
+
"##f": 110,
|
| 175 |
"##P": 111,
|
| 176 |
+
"##K": 112,
|
| 177 |
+
"##G": 113,
|
| 178 |
+
"##T": 114,
|
| 179 |
+
"##Z": 115,
|
| 180 |
+
"##8": 116,
|
| 181 |
+
"##q": 117,
|
| 182 |
+
"##Y": 118,
|
| 183 |
+
"##W": 119,
|
| 184 |
+
"##V": 120,
|
| 185 |
+
"##I": 121,
|
| 186 |
+
"##F": 122,
|
| 187 |
+
"##H": 123,
|
| 188 |
+
"##z": 124,
|
| 189 |
+
"##N": 125,
|
| 190 |
"##X": 126,
|
| 191 |
+
"##J": 127,
|
| 192 |
+
"##Q": 128,
|
| 193 |
"##he": 129,
|
| 194 |
"the": 130,
|
| 195 |
"##on": 131,
|
|
|
|
| 5493 |
"colon": 5429,
|
| 5494 |
"depression": 5430,
|
| 5495 |
"##known": 5431,
|
| 5496 |
+
"##know": 5432,
|
| 5497 |
+
"##men": 5433,
|
| 5498 |
"##insula": 5434,
|
| 5499 |
"outdoor": 5435,
|
| 5500 |
"##unk": 5436,
|
|
|
|
| 9005 |
"IoT": 8941,
|
| 9006 |
"tac": 8942,
|
| 9007 |
"staying": 8943,
|
| 9008 |
+
"##26": 8944,
|
| 9009 |
+
"##zens": 8945,
|
| 9010 |
"heated": 8946,
|
| 9011 |
"none": 8947,
|
| 9012 |
"temperatures": 8948,
|
tokenizer_config.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"
|
|
|
|
| 1 |
+
{"tokenizer_class": "PreTrainedTokenizerFast"}
|