w2v-bert-2_7_datasets / vocab.json
Bajiyo's picture
Upload tokenizer
a053dcb verified
raw
history blame
1.23 kB
{
"$": 1,
"&": 2,
"+": 3,
"/": 4,
"0": 5,
"1": 6,
"2": 7,
"3": 8,
"4": 9,
"5": 10,
"6": 11,
"7": 12,
"8": 13,
"9": 14,
"[": 15,
"[PAD]": 97,
"[UNK]": 96,
"]": 16,
"_": 17,
"|": 0,
"~": 18,
"°": 19,
"ം": 20,
"ഃ": 21,
"അ": 22,
"ആ": 23,
"ഇ": 24,
"ഈ": 25,
"ഉ": 26,
"ഊ": 27,
"ഋ": 28,
"എ": 29,
"ഏ": 30,
"ഐ": 31,
"ഒ": 32,
"ഓ": 33,
"ഔ": 34,
"ക": 35,
"ഖ": 36,
"ഗ": 37,
"ഘ": 38,
"ങ": 39,
"ച": 40,
"ഛ": 41,
"ജ": 42,
"ഝ": 43,
"ഞ": 44,
"ട": 45,
"ഠ": 46,
"ഡ": 47,
"ഢ": 48,
"ണ": 49,
"ത": 50,
"ഥ": 51,
"ദ": 52,
"ധ": 53,
"ന": 54,
"പ": 55,
"ഫ": 56,
"ബ": 57,
"ഭ": 58,
"മ": 59,
"യ": 60,
"ര": 61,
"റ": 62,
"ല": 63,
"ള": 64,
"ഴ": 65,
"വ": 66,
"ശ": 67,
"ഷ": 68,
"സ": 69,
"ഹ": 70,
"ാ": 71,
"ി": 72,
"ീ": 73,
"ു": 74,
"ൂ": 75,
"ൃ": 76,
"െ": 77,
"േ": 78,
"ൈ": 79,
"ൊ": 80,
"ോ": 81,
"ൌ": 82,
"്": 83,
"ൗ": 84,
"൱": 85,
"ൺ": 86,
"ൻ": 87,
"ർ": 88,
"ൽ": 89,
"ൾ": 90,
"ൿ": 91,
"‌": 92,
"‍": 93,
"–": 94,
"’": 95
}