murat commited on
Commit
207605d
·
verified ·
1 Parent(s): a947a9a

Upload vocab.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. vocab.json +340 -70
vocab.json CHANGED
@@ -4,83 +4,353 @@
4
  "<UNK>": 1,
5
  "<BOS>": 2,
6
  "<EOS>": 3,
7
- "«": 4,
8
- "б": 5,
9
- "у": 6,
10
- "л": 7,
11
  " ": 8,
12
- "и": 9,
13
- "ш": 10,
14
- "ч": 11,
15
- "а": 12,
16
- "р": 13,
17
- "д": 14,
18
- "н": 15,
19
- "ж": 16,
20
- "о": 17,
21
- "г": 18,
22
- "к": 19,
23
- "е": 20,
24
- "ң": 21,
25
- "э": 22,
26
- "ө": 23,
27
- "т": 24,
28
- "ү": 25,
29
- ",": 26,
30
- "м": 27,
31
- "ы": 28,
32
- "»": 29,
33
- ".": 30,
34
- "я": 31,
35
- "с": 32,
36
- "з": 33,
37
- "ю": 34,
38
- "в": 35,
39
- "й": 36,
40
- "п": 37,
41
- "-": 38,
42
- "ф": 39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  },
44
  "idx_to_char": {
45
  "0": "<PAD>",
46
  "1": "<UNK>",
47
  "2": "<BOS>",
48
  "3": "<EOS>",
49
- "4": "«",
50
- "5": "б",
51
- "6": "у",
52
- "7": "л",
53
  "8": " ",
54
- "9": "и",
55
- "10": "ш",
56
- "11": "ч",
57
- "12": "а",
58
- "13": "р",
59
- "14": "д",
60
- "15": "н",
61
- "16": "ж",
62
- "17": "о",
63
- "18": "г",
64
- "19": "к",
65
- "20": "е",
66
- "21": "ң",
67
- "22": "э",
68
- "23": "ө",
69
- "24": "т",
70
- "25": "ү",
71
- "26": ",",
72
- "27": "м",
73
- "28": "ы",
74
- "29": "»",
75
- "30": ".",
76
- "31": "я",
77
- "32": "с",
78
- "33": "з",
79
- "34": "ю",
80
- "35": "в",
81
- "36": "й",
82
- "37": "п",
83
- "38": "-",
84
- "39": "ф"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  }
 
4
  "<UNK>": 1,
5
  "<BOS>": 2,
6
  "<EOS>": 3,
7
+ "д": 4,
8
+ "а": 5,
9
+ "г": 6,
10
+ "ы": 7,
11
  " ": 8,
12
+ "б": 9,
13
+ "и": 10,
14
+ "р": 11,
15
+ "ж": 12,
16
+ "о": 13,
17
+ "л": 14,
18
+ "у": 15,
19
+ "й": 16,
20
+ "т": 17,
21
+ "м": 18,
22
+ ",": 19,
23
+ "к": 20,
24
+ "с": 21,
25
+ "н": 22,
26
+ "ч": 23,
27
+ "ө": 24,
28
+ "з": 25,
29
+ "ү": 26,
30
+ "э": 27,
31
+ "е": 28,
32
+ ".": 29,
33
+ "я": 30,
34
+ "п": 31,
35
+ "ш": 32,
36
+ "4": 33,
37
+ "0": 34,
38
+ "ц": 35,
39
+ "ң": 36,
40
+ "«": 37,
41
+ "»": 38,
42
+ "!": 39,
43
+ "в": 40,
44
+ "2": 41,
45
+ "-": 42,
46
+ "х": 43,
47
+ "ф": 44,
48
+ "k": 45,
49
+ "l": 46,
50
+ "o": 47,
51
+ "p": 48,
52
+ "g": 49,
53
+ "ю": 50,
54
+ "—": 51,
55
+ "7": 52,
56
+ "5": 53,
57
+ "1": 54,
58
+ "ь": 55,
59
+ "6": 56,
60
+ "i": 57,
61
+ "s": 58,
62
+ "a": 59,
63
+ " ": 60,
64
+ "t": 61,
65
+ "e": 62,
66
+ "m": 63,
67
+ "r": 64,
68
+ "v": 65,
69
+ "’": 66,
70
+ "9": 67,
71
+ "%": 68,
72
+ "3": 69,
73
+ "?": 70,
74
+ ":": 71,
75
+ "]": 72,
76
+ "[": 73,
77
+ "…": 74,
78
+ "8": 75,
79
+ "x": 76,
80
+ "(": 77,
81
+ ")": 78,
82
+ "№": 79,
83
+ ";": 80,
84
+ "–": 81,
85
+ "“": 82,
86
+ "”": 83,
87
+ "u": 84,
88
+ "​": 85,
89
+ "c": 86,
90
+ "•": 87,
91
+ "b": 88,
92
+ "f": 89,
93
+ "w": 90,
94
+ "ё": 91,
95
+ "n": 92,
96
+ "y": 93,
97
+ "d": 94,
98
+ "h": 95,
99
+ "―": 96,
100
+ "/": 97,
101
+ "*": 98,
102
+ "$": 99,
103
+ "ъ": 100,
104
+ "\"": 101,
105
+ "{": 102,
106
+ "}": 103,
107
+ "z": 104,
108
+ "щ": 105,
109
+ "q": 106,
110
+ "'": 107,
111
+ "@": 108,
112
+ "ɵ": 109,
113
+ "&": 110,
114
+ "ӊ": 111,
115
+ "j": 112,
116
+ "+": 113,
117
+ "ç": 114,
118
+ "ı": 115,
119
+ "ö": 116,
120
+ "ü": 117,
121
+ "ѳ": 118,
122
+ "─": 119,
123
+ "²": 120,
124
+ "‌": 121,
125
+ "ş": 122,
126
+ "‘": 123,
127
+ "‑": 124,
128
+ "_": 125,
129
+ "·": 126,
130
+ "#": 127,
131
+ "⅛": 128,
132
+ " ": 129,
133
+ "=": 130,
134
+ "ꞌ": 131,
135
+ "⅔": 132,
136
+ "⁠": 133,
137
+ "❤": 134,
138
+ "️": 135,
139
+ "є": 136,
140
+ "″": 137,
141
+ "ʙ": 138,
142
+ " ": 139,
143
+ "🙏": 140,
144
+ "̆": 141,
145
+ "☮": 142,
146
+ "¶": 143,
147
+ "қ": 144,
148
+ "ұ": 145,
149
+ "😊": 146,
150
+ "🙂": 147,
151
+ "‣": 148,
152
+ "і": 149,
153
+ "⅓": 150,
154
+ "\\": 151,
155
+ "≠": 152,
156
+ "➖": 153,
157
+ "é": 154,
158
+ "‒": 155,
159
+ "ä": 156,
160
+ "à": 157,
161
+ "⠀": 158,
162
+ "🇰": 159,
163
+ "🇬": 160,
164
+ "θ": 161,
165
+ "ʃ": 162,
166
+ "č": 163,
167
+ "á": 164,
168
+ "🎇": 165,
169
+ "🌅": 166,
170
+ "×": 167,
171
+ "‚": 168,
172
+ "°": 169,
173
+ "ě": 170,
174
+ "`": 171,
175
+ "ə": 172,
176
+ "ƣ": 173,
177
+ "„": 174
178
  },
179
  "idx_to_char": {
180
  "0": "<PAD>",
181
  "1": "<UNK>",
182
  "2": "<BOS>",
183
  "3": "<EOS>",
184
+ "4": "д",
185
+ "5": "а",
186
+ "6": "г",
187
+ "7": "ы",
188
  "8": " ",
189
+ "9": "б",
190
+ "10": "и",
191
+ "11": "р",
192
+ "12": "ж",
193
+ "13": "о",
194
+ "14": "л",
195
+ "15": "у",
196
+ "16": "й",
197
+ "17": "т",
198
+ "18": "м",
199
+ "19": ",",
200
+ "20": "к",
201
+ "21": "с",
202
+ "22": "н",
203
+ "23": "ч",
204
+ "24": "ө",
205
+ "25": "з",
206
+ "26": "ү",
207
+ "27": "э",
208
+ "28": "е",
209
+ "29": ".",
210
+ "30": "я",
211
+ "31": "п",
212
+ "32": "ш",
213
+ "33": "4",
214
+ "34": "0",
215
+ "35": "ц",
216
+ "36": "ң",
217
+ "37": "«",
218
+ "38": "»",
219
+ "39": "!",
220
+ "40": "в",
221
+ "41": "2",
222
+ "42": "-",
223
+ "43": "х",
224
+ "44": "ф",
225
+ "45": "k",
226
+ "46": "l",
227
+ "47": "o",
228
+ "48": "p",
229
+ "49": "g",
230
+ "50": "ю",
231
+ "51": "—",
232
+ "52": "7",
233
+ "53": "5",
234
+ "54": "1",
235
+ "55": "ь",
236
+ "56": "6",
237
+ "57": "i",
238
+ "58": "s",
239
+ "59": "a",
240
+ "60": " ",
241
+ "61": "t",
242
+ "62": "e",
243
+ "63": "m",
244
+ "64": "r",
245
+ "65": "v",
246
+ "66": "’",
247
+ "67": "9",
248
+ "68": "%",
249
+ "69": "3",
250
+ "70": "?",
251
+ "71": ":",
252
+ "72": "]",
253
+ "73": "[",
254
+ "74": "…",
255
+ "75": "8",
256
+ "76": "x",
257
+ "77": "(",
258
+ "78": ")",
259
+ "79": "№",
260
+ "80": ";",
261
+ "81": "–",
262
+ "82": "“",
263
+ "83": "”",
264
+ "84": "u",
265
+ "85": "​",
266
+ "86": "c",
267
+ "87": "•",
268
+ "88": "b",
269
+ "89": "f",
270
+ "90": "w",
271
+ "91": "ё",
272
+ "92": "n",
273
+ "93": "y",
274
+ "94": "d",
275
+ "95": "h",
276
+ "96": "―",
277
+ "97": "/",
278
+ "98": "*",
279
+ "99": "$",
280
+ "100": "ъ",
281
+ "101": "\"",
282
+ "102": "{",
283
+ "103": "}",
284
+ "104": "z",
285
+ "105": "щ",
286
+ "106": "q",
287
+ "107": "'",
288
+ "108": "@",
289
+ "109": "ɵ",
290
+ "110": "&",
291
+ "111": "ӊ",
292
+ "112": "j",
293
+ "113": "+",
294
+ "114": "ç",
295
+ "115": "ı",
296
+ "116": "ö",
297
+ "117": "ü",
298
+ "118": "ѳ",
299
+ "119": "─",
300
+ "120": "²",
301
+ "121": "‌",
302
+ "122": "ş",
303
+ "123": "‘",
304
+ "124": "‑",
305
+ "125": "_",
306
+ "126": "·",
307
+ "127": "#",
308
+ "128": "⅛",
309
+ "129": " ",
310
+ "130": "=",
311
+ "131": "ꞌ",
312
+ "132": "⅔",
313
+ "133": "⁠",
314
+ "134": "❤",
315
+ "135": "️",
316
+ "136": "є",
317
+ "137": "″",
318
+ "138": "ʙ",
319
+ "139": " ",
320
+ "140": "🙏",
321
+ "141": "̆",
322
+ "142": "☮",
323
+ "143": "¶",
324
+ "144": "қ",
325
+ "145": "ұ",
326
+ "146": "😊",
327
+ "147": "🙂",
328
+ "148": "‣",
329
+ "149": "і",
330
+ "150": "⅓",
331
+ "151": "\\",
332
+ "152": "≠",
333
+ "153": "➖",
334
+ "154": "é",
335
+ "155": "‒",
336
+ "156": "ä",
337
+ "157": "à",
338
+ "158": "⠀",
339
+ "159": "🇰",
340
+ "160": "🇬",
341
+ "161": "θ",
342
+ "162": "ʃ",
343
+ "163": "č",
344
+ "164": "á",
345
+ "165": "🎇",
346
+ "166": "🌅",
347
+ "167": "×",
348
+ "168": "‚",
349
+ "169": "°",
350
+ "170": "ě",
351
+ "171": "`",
352
+ "172": "ə",
353
+ "173": "ƣ",
354
+ "174": "„"
355
  }
356
  }