gpt2_50t_1M_256d_8l / tokenizer.json

Training in progress, step 1758

0b0915f verified 2 months ago

8.45 kB

	{
	"version": "1.0",
	"truncation": null,
	"padding": null,
	"added_tokens": [
	{
	"id": 0,
	"content": "<unk>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 1,
	"content": "<pad>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 2,
	"content": "<mask>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 3,
	"content": "<bos>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	}
	],
	"normalizer": null,
	"pre_tokenizer": {
	"type": "WhitespaceSplit"
	},
	"post_processor": {
	"type": "TemplateProcessing",
	"single": [
	{
	"SpecialToken": {
	"id": "<bos>",
	"type_id": 0
	}
	},
	{
	"Sequence": {
	"id": "A",
	"type_id": 0
	}
	}
	],
	"pair": [
	{
	"Sequence": {
	"id": "A",
	"type_id": 0
	}
	},
	{
	"Sequence": {
	"id": "B",
	"type_id": 1
	}
	}
	],
	"special_tokens": {
	"<bos>": {
	"id": "<bos>",
	"ids": [
	3
	],
	"tokens": [
	"<bos>"
	]
	}
	}
	},
	"decoder": null,
	"model": {
	"type": "WordLevel",
	"vocab": {
	"<unk>": 0,
	"<pad>": 1,
	"<mask>": 2,
	"<bos>": 3,
	".": 4,
	",": 5,
	"the": 6,
	"was": 7,
	"and": 8,
	"to": 9,
	"a": 10,
	"Noah": 11,
	"he": 12,
	"his": 13,
	"of": 14,
	"n<apostrophe>t": 15,
	"back": 16,
	"<apostrophe><apostrophe>": 17,
	"The": 18,
	"in": 19,
	"it": 20,
	"boy": 21,
	"had": 22,
	"``": 23,
	"<apostrophe>s": 24,
	"up": 25,
	"you": 26,
	"Bible": 27,
	"one": 28,
	"that": 29,
	"she": 30,
	"I": 31,
	"She": 32,
	"there": 33,
	"on": 34,
	"be": 35,
	"from": 36,
	"friend": 37,
	"get": 38,
	"over": 39,
	"would": 40,
	"really": 41,
	"about": 42,
	"And": 43,
	"He": 44,
	"off": 45,
	"this": 46,
	"here": 47,
	"never": 48,
	"just": 49,
	"good": 50,
	"with": 51,
	"by": 52,
	"ditch": 53,
	":": 54,
	"happened": 55,
	"left": 56,
	"came": 57,
	"But": 58,
	"but": 59,
	"It": 60,
	"they": 61,
	"him": 62,
	"did": 63,
	"were": 64,
	"<apostrophe>ll": 65,
	"her": 66,
	"Quechua": 67,
	"now": 68,
	"then": 69,
	"dead": 70,
	"like": 71,
	"higher": 72,
	"guy": 73,
	"for": 74,
	"chin": 75,
	"into": 76,
	"best": 77,
	"so": 78,
	"them": 79,
	"out": 80,
	"leave": 81,
	"looking": 82,
	"Cochabamba": 83,
	"eyes": 84,
	"grass": 85,
	"Oh": 86,
	"?": 87,
	"asked": 88,
	"saw": 89,
	"knew": 90,
	"slid": 91,
	"began": 92,
	"quite": 93,
	"fell": 94,
	"looked": 95,
	"used": 96,
	"forced": 97,
	"intended": 98,
	"wore": 99,
	"is": 100,
	"warmed": 101,
	"heard": 102,
	"pushed": 103,
	"cared": 104,
	"Plus": 105,
	"meant": 106,
	"They": 107,
	"no": 108,
	"gold": 109,
	"we": 110,
	"breathe": 111,
	"make": 112,
	"school": 113,
	"give": 114,
	"teach": 115,
	"speak": 116,
	"march": 117,
	"try": 118,
	"help": 119,
	"justice": 120,
	"seat": 121,
	"could": 122,
	"surfer": 123,
	"stop": 124,
	"stand": 125,
	"sleeve": 126,
	"position": 127,
	"unmistakably": 128,
	"care": 129,
	"badly": 130,
	"eye": 131,
	"should": 132,
	"find": 133,
	"freezing": 134,
	"not": 135,
	"downright": 136,
	"crumpled": 137,
	"himself": 138,
	"garden": 139,
	"dude": 140,
	"teaching": 141,
	"can": 142,
	"Another": 143,
	"have": 144,
	"class": 145,
	"together": 146,
	"face-up": 147,
	"shaking": 148,
	"soon": 149,
	"waking": 150,
	"none": 151,
	"everything": 152,
	"blackened": 153,
	"sprawled": 154,
	"been": 155,
	"moved": 156,
	"muscle": 157,
	"mattered": 158,
	"might": 159,
	"aside": 160,
	"going": 161,
	"hardly": 162,
	"glinting": 163,
	"Today": 164,
	"finally": 165,
	"anymore": 166,
	"gone": 167,
	"Tuesday": 168,
	"Did": 169,
	"Word": 170,
	"told": 171,
	"climbing": 172,
	"mutter": 173,
	"stepping": 174,
	"dripping": 175,
	"Wiping": 176,
	"flung": 177,
	"town": 178,
	"know": 179,
	"cap": 180,
	"Hearst": 181,
	"Ohio": 182,
	"are": 183,
	"stones": 184,
	"native": 185,
	"four": 186,
	"Bolivian": 187,
	"potatoes": 188,
	"turquoise": 189,
	"lot": 190,
	"hat": 191,
	"eight": 192,
	"right": 193,
	"lucky": 194,
	"moonlight": 195,
	"country": 196,
	"street": 197,
	"classes": 198,
	"calculations": 199,
	"knees": 200,
	"luggage": 201,
	"aura": 202,
	"world": 203,
	"His": 204,
	"cows": 205,
	"road": 206,
	"hypocrite": 207,
	"green": 208,
	"By": 209,
	"clouds": 210,
	"freaky": 211,
	"white": 212,
	"onto": 213,
	"fingers": 214,
	"bloodied": 215,
	"months": 216,
	"tears": 217,
	"finger": 218,
	"fat": 219,
	"full": 220,
	"sure": 221,
	"surfers": 222,
	"Her": 223,
	"women": 224,
	"things": 225,
	"At": 226,
	"hours": 227,
	"Because": 228,
	"under": 229,
	"first": 230,
	"vibrant": 231,
	"nice": 232,
	"embroidered": 233,
	"emerald": 234,
	"sharp": 235,
	"muddy": 236,
	"rough": 237,
	"stuff": 238,
	"able": 239,
	"low": 240,
	"swollen": 241,
	"content": 242,
	"knit": 243,
	"long": 244,
	"city": 245,
	"hike": 246,
	"literacy": 247,
	"face": 248,
	"cross": 249,
	"big": 250,
	"glance": 251,
	"nap": 252,
	"moan": 253,
	"uniform": 254,
	"perch": 255,
	"breath": 256,
	"ride": 257,
	"virtue": 258,
	"towards": 259,
	"because": 260,
	"Despite": 261,
	"since": 262,
	"as": 263,
	"at": 264,
	"what": 265,
	"where": 266,
	"how": 267,
	"when": 268,
	"grimaced": 269,
	"narrowed": 270,
	"rose": 271,
	"gave": 272,
	"thought": 273,
	"tried": 274,
	"crisped": 275,
	"anybody": 276,
	"tossed": 277,
	"skin": 278,
	"hazel": 279,
	"house": 280,
	"addition": 281,
	"path": 282,
	"forward": 283,
	"dirt": 284,
	"too": 285,
	"Not": 286,
	"seriously": 287,
	"Jeep": 288,
	"again": 289,
	"sun": 290,
	"conference": 291,
	"even": 292,
	"place": 293,
	"child": 294,
	"Seriously": 295,
	"baby": 296,
	"looks": 297,
	"This": 298,
	"tree": 299,
	"lip": 300,
	"sky": 301,
	"lunch": 302,
	"all": 303,
	"light": 304,
	"stretch": 305,
	"staying": 306,
	"rising": 307,
	"take": 308,
	"worried": 309,
	"carrying": 310,
	"Sounding": 311,
	"studied": 312,
	"being": 313,
	"murky": 314,
	"quivered": 315,
	"beckoning": 316,
	"red": 317,
	"guess": 318,
	"than": 319,
	"Twilight": 320,
	"skeletal": 321,
	"Spain": 322,
	"Spanish": 323,
	"strong": 324,
	"blond": 325,
	"Zone": 326,
	"fine": 327,
	"own": 328,
	"In": 329,
	"tall": 330,
	"single": 331,
	"silky": 332,
	"parents": 333,
	"our": 334,
	"thick": 335,
	"shadows": 336,
	"gravel": 337,
	"remains": 338,
	"darker": 339,
	"lower": 340,
	"If": 341,
	"outside": 342,
	"<apostrophe>": 343,
	"who": 344
	},
	"unk_token": "<unk>"
	}
	}