MBMMurad's picture
add tokenizer
368a8e5
{
"!": 1,
"'": 2,
",": 3,
"-": 4,
".": 5,
":": 6,
";": 7,
"=": 8,
"?": 9,
"[PAD]": 84,
"[UNK]": 83,
"|": 0,
"।": 10,
"ঁ": 11,
"ং": 12,
"ঃ": 13,
"অ": 14,
"আ": 15,
"ই": 16,
"ঈ": 17,
"উ": 18,
"ঊ": 19,
"ঋ": 20,
"এ": 21,
"ঐ": 22,
"ও": 23,
"ঔ": 24,
"ক": 25,
"খ": 26,
"গ": 27,
"ঘ": 28,
"ঙ": 29,
"চ": 30,
"ছ": 31,
"জ": 32,
"ঝ": 33,
"ঞ": 34,
"ট": 35,
"ঠ": 36,
"ড": 37,
"ঢ": 38,
"ণ": 39,
"ত": 40,
"থ": 41,
"দ": 42,
"ধ": 43,
"ন": 44,
"প": 45,
"ফ": 46,
"ব": 47,
"ভ": 48,
"ম": 49,
"য": 50,
"র": 51,
"ল": 52,
"শ": 53,
"ষ": 54,
"স": 55,
"হ": 56,
"া": 57,
"ি": 58,
"ী": 59,
"ু": 60,
"ূ": 61,
"ৃ": 62,
"ে": 63,
"ৈ": 64,
"ো": 65,
"ৌ": 66,
"্": 67,
"ৎ": 68,
"ড়": 69,
"ঢ়": 70,
"য়": 71,
"০": 72,
"১": 73,
"২": 74,
"৩": 75,
"৪": 76,
"৫": 77,
"৬": 78,
"৭": 79,
"৮": 80,
"৯": 81,
"‍": 82
}