Badr Abdullah
Upload tokenizer
ce6f745 verified
raw
history blame
569 Bytes
{
"(": 1,
")": 2,
"[PAD]": 47,
"[UNK]": 46,
"_": 3,
"`": 4,
"a": 5,
"e": 6,
"i": 7,
"o": 8,
"x": 9,
"|": 0,
"»": 10,
"а": 11,
"б": 12,
"в": 13,
"г": 14,
"д": 15,
"е": 16,
"ж": 17,
"з": 18,
"и": 19,
"й": 20,
"к": 21,
"л": 22,
"м": 23,
"н": 24,
"о": 25,
"п": 26,
"р": 27,
"с": 28,
"т": 29,
"у": 30,
"ф": 31,
"х": 32,
"ц": 33,
"ч": 34,
"ш": 35,
"щ": 36,
"ъ": 37,
"ь": 38,
"ю": 39,
"я": 40,
"ѝ": 41,
"–": 42,
"—": 43,
"„": 44,
"…": 45
}