Reihaneh commited on
Commit
e0970ab
·
verified ·
1 Parent(s): 0180946

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +77 -77
vocab.json CHANGED
@@ -1,84 +1,84 @@
1
  {
2
  "'": 77,
3
- "L": 35,
4
- "M": 4,
5
  "ML": 80,
6
  "TA": 81,
7
- "[": 23,
8
  "[PAD]": 79,
9
  "[UNK]": 78,
10
- "]": 22,
11
- "|": 33,
12
- "ം": 50,
13
- "ഃ": 54,
14
- "അ": 30,
15
- "ആ": 57,
16
- "ഇ": 43,
17
- "ഈ": 14,
18
- "ഉ": 10,
19
- "ഊ": 64,
20
- "എ": 67,
21
- "ഏ": 19,
22
- "ഐ": 48,
23
- "ഒ": 53,
24
- "ഓ": 58,
25
- "ക": 66,
26
- "ഖ": 39,
27
- "ഗ": 29,
28
- "ഘ": 72,
29
- "ങ": 42,
30
- "ച": 32,
31
- "ഛ": 9,
32
- "ജ": 74,
33
- "ഞ": 61,
34
- "ട": 2,
35
- "ഠ": 7,
36
- "ഡ": 8,
37
- "ഢ": 44,
38
- "ണ": 68,
39
- "ത": 65,
40
- "ഥ": 52,
41
- "ദ": 24,
42
- "ധ": 12,
43
- "ന": 20,
44
- "പ": 47,
45
- "ഫ": 40,
46
- "ബ": 56,
47
- "ഭ": 37,
48
- "മ": 59,
49
- "യ": 3,
50
- "ര": 17,
51
- "റ": 69,
52
- "ല": 46,
53
- "ള": 13,
54
- "ഴ": 16,
55
- "വ": 55,
56
- "ശ": 45,
57
- "ഷ": 5,
58
- "സ": 15,
59
- "ഹ": 27,
60
- "ാ": 49,
61
- "ി": 60,
62
- "ീ": 18,
63
- "ു": 11,
64
- "ൂ": 1,
65
- "ൃ": 6,
66
- "െ": 63,
67
- "േ": 51,
68
- "ൈ": 75,
69
- "ൊ": 62,
70
- "ോ": 21,
71
- "ൌ": 0,
72
- "്": 38,
73
- "ൗ": 73,
74
- "ൺ": 25,
75
- "ൻ": 26,
76
- "ർ": 34,
77
- "ൽ": 71,
78
- "ൾ": 31,
79
- "ൿ": 70,
80
- "‘": 28,
81
- "’": 36,
82
- "“": 41,
83
- "”": 76
84
  }
 
1
  {
2
  "'": 77,
3
+ "L": 3,
4
+ "M": 15,
5
  "ML": 80,
6
  "TA": 81,
7
+ "[": 72,
8
  "[PAD]": 79,
9
  "[UNK]": 78,
10
+ "]": 55,
11
+ "|": 44,
12
+ "ം": 20,
13
+ "ഃ": 25,
14
+ "അ": 34,
15
+ "ആ": 63,
16
+ "ഇ": 30,
17
+ "ഈ": 35,
18
+ "ഉ": 21,
19
+ "ഊ": 27,
20
+ "എ": 51,
21
+ "ഏ": 37,
22
+ "ഐ": 58,
23
+ "ഒ": 46,
24
+ "ഓ": 62,
25
+ "ക": 65,
26
+ "ഖ": 69,
27
+ "ഗ": 0,
28
+ "ഘ": 17,
29
+ "ങ": 57,
30
+ "ച": 68,
31
+ "ഛ": 71,
32
+ "ജ": 76,
33
+ "ഞ": 47,
34
+ "ട": 14,
35
+ "ഠ": 74,
36
+ "ഡ": 39,
37
+ "ഢ": 1,
38
+ "ണ": 10,
39
+ "ത": 29,
40
+ "ഥ": 67,
41
+ "ദ": 18,
42
+ "ധ": 9,
43
+ "ന": 73,
44
+ "പ": 66,
45
+ "ഫ": 38,
46
+ "ബ": 61,
47
+ "ഭ": 45,
48
+ "മ": 19,
49
+ "യ": 16,
50
+ "ര": 7,
51
+ "റ": 59,
52
+ "ല": 28,
53
+ "ള": 40,
54
+ "ഴ": 2,
55
+ "വ": 54,
56
+ "ശ": 64,
57
+ "ഷ": 26,
58
+ "സ": 56,
59
+ "ഹ": 42,
60
+ "ാ": 5,
61
+ "ി": 49,
62
+ "ീ": 24,
63
+ "ു": 75,
64
+ "ൂ": 23,
65
+ "ൃ": 41,
66
+ "െ": 31,
67
+ "േ": 50,
68
+ "ൈ": 53,
69
+ "ൊ": 60,
70
+ "ോ": 48,
71
+ "ൌ": 70,
72
+ "്": 12,
73
+ "ൗ": 43,
74
+ "ൺ": 13,
75
+ "ൻ": 36,
76
+ "ർ": 6,
77
+ "ൽ": 32,
78
+ "ൾ": 52,
79
+ "ൿ": 11,
80
+ "‘": 8,
81
+ "’": 4,
82
+ "“": 33,
83
+ "”": 22
84
  }