Bibek1129 commited on
Commit
f79a444
1 Parent(s): fa5d586

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +129 -129
  2. vocab.json +101 -101
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "1": {
4
- "content": "४८",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
@@ -9,7 +9,7 @@
9
  "special": false
10
  },
11
  "2": {
12
- "content": "८८",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
@@ -17,7 +17,7 @@
17
  "special": false
18
  },
19
  "3": {
20
- "content": "२९",
21
  "lstrip": true,
22
  "normalized": false,
23
  "rstrip": true,
@@ -25,7 +25,7 @@
25
  "special": false
26
  },
27
  "4": {
28
- "content": "९५",
29
  "lstrip": true,
30
  "normalized": false,
31
  "rstrip": true,
@@ -33,7 +33,7 @@
33
  "special": false
34
  },
35
  "5": {
36
- "content": "२३",
37
  "lstrip": true,
38
  "normalized": false,
39
  "rstrip": true,
@@ -41,7 +41,7 @@
41
  "special": false
42
  },
43
  "6": {
44
- "content": "६१",
45
  "lstrip": true,
46
  "normalized": false,
47
  "rstrip": true,
@@ -49,7 +49,7 @@
49
  "special": false
50
  },
51
  "7": {
52
- "content": "४७",
53
  "lstrip": true,
54
  "normalized": false,
55
  "rstrip": true,
@@ -57,7 +57,7 @@
57
  "special": false
58
  },
59
  "8": {
60
- "content": "२५",
61
  "lstrip": true,
62
  "normalized": false,
63
  "rstrip": true,
@@ -65,7 +65,7 @@
65
  "special": false
66
  },
67
  "9": {
68
- "content": "६८",
69
  "lstrip": true,
70
  "normalized": false,
71
  "rstrip": true,
@@ -73,14 +73,14 @@
73
  "special": false
74
  },
75
  "10": {
76
- "content": "८७",
77
  "lstrip": true,
78
  "normalized": false,
79
  "rstrip": true,
80
  "single_word": false,
81
  "special": false
82
  },
83
- "12": {
84
  "content": "४५",
85
  "lstrip": true,
86
  "normalized": false,
@@ -88,8 +88,16 @@
88
  "single_word": false,
89
  "special": false
90
  },
 
 
 
 
 
 
 
 
91
  "13": {
92
- "content": "५७",
93
  "lstrip": true,
94
  "normalized": false,
95
  "rstrip": true,
@@ -97,7 +105,15 @@
97
  "special": false
98
  },
99
  "14": {
100
- "content": "३१",
 
 
 
 
 
 
 
 
101
  "lstrip": true,
102
  "normalized": false,
103
  "rstrip": true,
@@ -105,7 +121,7 @@
105
  "special": false
106
  },
107
  "16": {
108
- "content": "५५",
109
  "lstrip": true,
110
  "normalized": false,
111
  "rstrip": true,
@@ -113,7 +129,7 @@
113
  "special": false
114
  },
115
  "17": {
116
- "content": "२४",
117
  "lstrip": true,
118
  "normalized": false,
119
  "rstrip": true,
@@ -121,15 +137,15 @@
121
  "special": false
122
  },
123
  "18": {
124
- "content": "५८",
125
  "lstrip": true,
126
  "normalized": false,
127
  "rstrip": true,
128
  "single_word": false,
129
  "special": false
130
  },
131
- "20": {
132
- "content": "१५",
133
  "lstrip": true,
134
  "normalized": false,
135
  "rstrip": true,
@@ -137,7 +153,7 @@
137
  "special": false
138
  },
139
  "21": {
140
- "content": "६३",
141
  "lstrip": true,
142
  "normalized": false,
143
  "rstrip": true,
@@ -145,7 +161,7 @@
145
  "special": false
146
  },
147
  "22": {
148
- "content": "९२",
149
  "lstrip": true,
150
  "normalized": false,
151
  "rstrip": true,
@@ -153,7 +169,7 @@
153
  "special": false
154
  },
155
  "23": {
156
- "content": "९७",
157
  "lstrip": true,
158
  "normalized": false,
159
  "rstrip": true,
@@ -161,7 +177,7 @@
161
  "special": false
162
  },
163
  "24": {
164
- "content": "७६",
165
  "lstrip": true,
166
  "normalized": false,
167
  "rstrip": true,
@@ -169,7 +185,7 @@
169
  "special": false
170
  },
171
  "25": {
172
- "content": "४६",
173
  "lstrip": true,
174
  "normalized": false,
175
  "rstrip": true,
@@ -177,15 +193,7 @@
177
  "special": false
178
  },
179
  "26": {
180
- "content": "४४",
181
- "lstrip": true,
182
- "normalized": false,
183
- "rstrip": true,
184
- "single_word": false,
185
- "special": false
186
- },
187
- "27": {
188
- "content": "८४",
189
  "lstrip": true,
190
  "normalized": false,
191
  "rstrip": true,
@@ -193,7 +201,7 @@
193
  "special": false
194
  },
195
  "28": {
196
- "content": "९०",
197
  "lstrip": true,
198
  "normalized": false,
199
  "rstrip": true,
@@ -201,7 +209,7 @@
201
  "special": false
202
  },
203
  "29": {
204
- "content": "१४",
205
  "lstrip": true,
206
  "normalized": false,
207
  "rstrip": true,
@@ -209,7 +217,7 @@
209
  "special": false
210
  },
211
  "30": {
212
- "content": "३३",
213
  "lstrip": true,
214
  "normalized": false,
215
  "rstrip": true,
@@ -217,7 +225,7 @@
217
  "special": false
218
  },
219
  "31": {
220
- "content": "५२",
221
  "lstrip": true,
222
  "normalized": false,
223
  "rstrip": true,
@@ -225,7 +233,7 @@
225
  "special": false
226
  },
227
  "32": {
228
- "content": "९६",
229
  "lstrip": true,
230
  "normalized": false,
231
  "rstrip": true,
@@ -233,7 +241,7 @@
233
  "special": false
234
  },
235
  "33": {
236
- "content": "८५",
237
  "lstrip": true,
238
  "normalized": false,
239
  "rstrip": true,
@@ -241,23 +249,23 @@
241
  "special": false
242
  },
243
  "34": {
244
- "content": "८६",
245
  "lstrip": true,
246
  "normalized": false,
247
  "rstrip": true,
248
  "single_word": false,
249
  "special": false
250
  },
251
- "36": {
252
- "content": "१०",
253
  "lstrip": true,
254
  "normalized": false,
255
  "rstrip": true,
256
  "single_word": false,
257
  "special": false
258
  },
259
- "37": {
260
- "content": "१२",
261
  "lstrip": true,
262
  "normalized": false,
263
  "rstrip": true,
@@ -265,15 +273,7 @@
265
  "special": false
266
  },
267
  "38": {
268
- "content": "७३",
269
- "lstrip": true,
270
- "normalized": false,
271
- "rstrip": true,
272
- "single_word": false,
273
- "special": false
274
- },
275
- "39": {
276
- "content": "११",
277
  "lstrip": true,
278
  "normalized": false,
279
  "rstrip": true,
@@ -281,39 +281,39 @@
281
  "special": false
282
  },
283
  "40": {
284
- "content": "१६",
285
  "lstrip": true,
286
  "normalized": false,
287
  "rstrip": true,
288
  "single_word": false,
289
  "special": false
290
  },
291
- "41": {
292
- "content": "६६",
293
  "lstrip": true,
294
  "normalized": false,
295
  "rstrip": true,
296
  "single_word": false,
297
  "special": false
298
  },
299
- "42": {
300
- "content": "३५",
301
  "lstrip": true,
302
  "normalized": false,
303
  "rstrip": true,
304
  "single_word": false,
305
  "special": false
306
  },
307
- "43": {
308
- "content": "३०",
309
  "lstrip": true,
310
  "normalized": false,
311
  "rstrip": true,
312
  "single_word": false,
313
  "special": false
314
  },
315
- "44": {
316
- "content": "३९",
317
  "lstrip": true,
318
  "normalized": false,
319
  "rstrip": true,
@@ -321,7 +321,7 @@
321
  "special": false
322
  },
323
  "46": {
324
- "content": "७४",
325
  "lstrip": true,
326
  "normalized": false,
327
  "rstrip": true,
@@ -329,39 +329,39 @@
329
  "special": false
330
  },
331
  "47": {
332
- "content": "६७",
333
  "lstrip": true,
334
  "normalized": false,
335
  "rstrip": true,
336
  "single_word": false,
337
  "special": false
338
  },
339
- "48": {
340
- "content": "५९",
341
  "lstrip": true,
342
  "normalized": false,
343
  "rstrip": true,
344
  "single_word": false,
345
  "special": false
346
  },
347
- "49": {
348
- "content": "९८",
349
  "lstrip": true,
350
  "normalized": false,
351
  "rstrip": true,
352
  "single_word": false,
353
  "special": false
354
  },
355
- "50": {
356
- "content": "५६",
357
  "lstrip": true,
358
  "normalized": false,
359
  "rstrip": true,
360
  "single_word": false,
361
  "special": false
362
  },
363
- "51": {
364
- "content": "१००",
365
  "lstrip": true,
366
  "normalized": false,
367
  "rstrip": true,
@@ -369,7 +369,7 @@
369
  "special": false
370
  },
371
  "53": {
372
- "content": "६०",
373
  "lstrip": true,
374
  "normalized": false,
375
  "rstrip": true,
@@ -377,7 +377,7 @@
377
  "special": false
378
  },
379
  "54": {
380
- "content": "८९",
381
  "lstrip": true,
382
  "normalized": false,
383
  "rstrip": true,
@@ -385,7 +385,15 @@
385
  "special": false
386
  },
387
  "55": {
388
- "content": "३७",
 
 
 
 
 
 
 
 
389
  "lstrip": true,
390
  "normalized": false,
391
  "rstrip": true,
@@ -393,7 +401,7 @@
393
  "special": false
394
  },
395
  "57": {
396
- "content": "५४",
397
  "lstrip": true,
398
  "normalized": false,
399
  "rstrip": true,
@@ -401,7 +409,7 @@
401
  "special": false
402
  },
403
  "58": {
404
- "content": "१३",
405
  "lstrip": true,
406
  "normalized": false,
407
  "rstrip": true,
@@ -409,7 +417,7 @@
409
  "special": false
410
  },
411
  "59": {
412
- "content": "५३",
413
  "lstrip": true,
414
  "normalized": false,
415
  "rstrip": true,
@@ -417,7 +425,7 @@
417
  "special": false
418
  },
419
  "60": {
420
- "content": "५१",
421
  "lstrip": true,
422
  "normalized": false,
423
  "rstrip": true,
@@ -425,7 +433,7 @@
425
  "special": false
426
  },
427
  "61": {
428
- "content": "६९",
429
  "lstrip": true,
430
  "normalized": false,
431
  "rstrip": true,
@@ -433,7 +441,7 @@
433
  "special": false
434
  },
435
  "62": {
436
- "content": "७५",
437
  "lstrip": true,
438
  "normalized": false,
439
  "rstrip": true,
@@ -441,7 +449,15 @@
441
  "special": false
442
  },
443
  "63": {
444
- "content": "८०",
 
 
 
 
 
 
 
 
445
  "lstrip": true,
446
  "normalized": false,
447
  "rstrip": true,
@@ -449,7 +465,7 @@
449
  "special": false
450
  },
451
  "65": {
452
- "content": "७२",
453
  "lstrip": true,
454
  "normalized": false,
455
  "rstrip": true,
@@ -457,7 +473,7 @@
457
  "special": false
458
  },
459
  "66": {
460
- "content": "६४",
461
  "lstrip": true,
462
  "normalized": false,
463
  "rstrip": true,
@@ -465,7 +481,7 @@
465
  "special": false
466
  },
467
  "67": {
468
- "content": "८२",
469
  "lstrip": true,
470
  "normalized": false,
471
  "rstrip": true,
@@ -473,7 +489,7 @@
473
  "special": false
474
  },
475
  "68": {
476
- "content": "४३",
477
  "lstrip": true,
478
  "normalized": false,
479
  "rstrip": true,
@@ -481,7 +497,7 @@
481
  "special": false
482
  },
483
  "70": {
484
- "content": "९४",
485
  "lstrip": true,
486
  "normalized": false,
487
  "rstrip": true,
@@ -489,7 +505,7 @@
489
  "special": false
490
  },
491
  "71": {
492
- "content": "१९",
493
  "lstrip": true,
494
  "normalized": false,
495
  "rstrip": true,
@@ -497,7 +513,7 @@
497
  "special": false
498
  },
499
  "72": {
500
- "content": "२१",
501
  "lstrip": true,
502
  "normalized": false,
503
  "rstrip": true,
@@ -505,15 +521,7 @@
505
  "special": false
506
  },
507
  "73": {
508
- "content": "४२",
509
- "lstrip": true,
510
- "normalized": false,
511
- "rstrip": true,
512
- "single_word": false,
513
- "special": false
514
- },
515
- "74": {
516
- "content": "२६",
517
  "lstrip": true,
518
  "normalized": false,
519
  "rstrip": true,
@@ -521,7 +529,7 @@
521
  "special": false
522
  },
523
  "75": {
524
- "content": "६२",
525
  "lstrip": true,
526
  "normalized": false,
527
  "rstrip": true,
@@ -529,7 +537,7 @@
529
  "special": false
530
  },
531
  "76": {
532
- "content": "९१",
533
  "lstrip": true,
534
  "normalized": false,
535
  "rstrip": true,
@@ -537,7 +545,7 @@
537
  "special": false
538
  },
539
  "78": {
540
- "content": "९३",
541
  "lstrip": true,
542
  "normalized": false,
543
  "rstrip": true,
@@ -545,7 +553,7 @@
545
  "special": false
546
  },
547
  "79": {
548
- "content": "२८",
549
  "lstrip": true,
550
  "normalized": false,
551
  "rstrip": true,
@@ -553,7 +561,7 @@
553
  "special": false
554
  },
555
  "80": {
556
- "content": "५०",
557
  "lstrip": true,
558
  "normalized": false,
559
  "rstrip": true,
@@ -561,7 +569,7 @@
561
  "special": false
562
  },
563
  "81": {
564
- "content": "३२",
565
  "lstrip": true,
566
  "normalized": false,
567
  "rstrip": true,
@@ -569,7 +577,7 @@
569
  "special": false
570
  },
571
  "82": {
572
- "content": "२२",
573
  "lstrip": true,
574
  "normalized": false,
575
  "rstrip": true,
@@ -577,7 +585,7 @@
577
  "special": false
578
  },
579
  "83": {
580
- "content": "४०",
581
  "lstrip": true,
582
  "normalized": false,
583
  "rstrip": true,
@@ -585,7 +593,7 @@
585
  "special": false
586
  },
587
  "84": {
588
- "content": "७१",
589
  "lstrip": true,
590
  "normalized": false,
591
  "rstrip": true,
@@ -593,7 +601,7 @@
593
  "special": false
594
  },
595
  "85": {
596
- "content": "८१",
597
  "lstrip": true,
598
  "normalized": false,
599
  "rstrip": true,
@@ -609,7 +617,7 @@
609
  "special": false
610
  },
611
  "87": {
612
- "content": "७९",
613
  "lstrip": true,
614
  "normalized": false,
615
  "rstrip": true,
@@ -617,7 +625,7 @@
617
  "special": false
618
  },
619
  "88": {
620
- "content": "३४",
621
  "lstrip": true,
622
  "normalized": false,
623
  "rstrip": true,
@@ -625,7 +633,7 @@
625
  "special": false
626
  },
627
  "89": {
628
- "content": "२०",
629
  "lstrip": true,
630
  "normalized": false,
631
  "rstrip": true,
@@ -633,7 +641,7 @@
633
  "special": false
634
  },
635
  "90": {
636
- "content": "३६",
637
  "lstrip": true,
638
  "normalized": false,
639
  "rstrip": true,
@@ -641,7 +649,7 @@
641
  "special": false
642
  },
643
  "91": {
644
- "content": "७७",
645
  "lstrip": true,
646
  "normalized": false,
647
  "rstrip": true,
@@ -649,7 +657,7 @@
649
  "special": false
650
  },
651
  "92": {
652
- "content": "६५",
653
  "lstrip": true,
654
  "normalized": false,
655
  "rstrip": true,
@@ -657,7 +665,7 @@
657
  "special": false
658
  },
659
  "93": {
660
- "content": "७०",
661
  "lstrip": true,
662
  "normalized": false,
663
  "rstrip": true,
@@ -665,7 +673,7 @@
665
  "special": false
666
  },
667
  "94": {
668
- "content": "७८",
669
  "lstrip": true,
670
  "normalized": false,
671
  "rstrip": true,
@@ -673,7 +681,7 @@
673
  "special": false
674
  },
675
  "95": {
676
- "content": "१७",
677
  "lstrip": true,
678
  "normalized": false,
679
  "rstrip": true,
@@ -681,7 +689,7 @@
681
  "special": false
682
  },
683
  "96": {
684
- "content": "२७",
685
  "lstrip": true,
686
  "normalized": false,
687
  "rstrip": true,
@@ -689,15 +697,7 @@
689
  "special": false
690
  },
691
  "97": {
692
- "content": "८३",
693
- "lstrip": true,
694
- "normalized": false,
695
- "rstrip": true,
696
- "single_word": false,
697
- "special": false
698
- },
699
- "98": {
700
- "content": "४१",
701
  "lstrip": true,
702
  "normalized": false,
703
  "rstrip": true,
@@ -705,7 +705,7 @@
705
  "special": false
706
  },
707
  "99": {
708
- "content": "१८",
709
  "lstrip": true,
710
  "normalized": false,
711
  "rstrip": true,
@@ -713,7 +713,7 @@
713
  "special": false
714
  },
715
  "100": {
716
- "content": "३८",
717
  "lstrip": true,
718
  "normalized": false,
719
  "rstrip": true,
@@ -721,7 +721,7 @@
721
  "special": false
722
  },
723
  "101": {
724
- "content": "९९",
725
  "lstrip": true,
726
  "normalized": false,
727
  "rstrip": true,
 
1
  {
2
  "added_tokens_decoder": {
3
  "1": {
4
+ "content": "१६",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
 
9
  "special": false
10
  },
11
  "2": {
12
+ "content": "३७",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
 
17
  "special": false
18
  },
19
  "3": {
20
+ "content": "५६",
21
  "lstrip": true,
22
  "normalized": false,
23
  "rstrip": true,
 
25
  "special": false
26
  },
27
  "4": {
28
+ "content": "२७",
29
  "lstrip": true,
30
  "normalized": false,
31
  "rstrip": true,
 
33
  "special": false
34
  },
35
  "5": {
36
+ "content": "५४",
37
  "lstrip": true,
38
  "normalized": false,
39
  "rstrip": true,
 
41
  "special": false
42
  },
43
  "6": {
44
+ "content": "८९",
45
  "lstrip": true,
46
  "normalized": false,
47
  "rstrip": true,
 
49
  "special": false
50
  },
51
  "7": {
52
+ "content": "२५",
53
  "lstrip": true,
54
  "normalized": false,
55
  "rstrip": true,
 
57
  "special": false
58
  },
59
  "8": {
60
+ "content": "४०",
61
  "lstrip": true,
62
  "normalized": false,
63
  "rstrip": true,
 
65
  "special": false
66
  },
67
  "9": {
68
+ "content": "६६",
69
  "lstrip": true,
70
  "normalized": false,
71
  "rstrip": true,
 
73
  "special": false
74
  },
75
  "10": {
76
+ "content": "९०",
77
  "lstrip": true,
78
  "normalized": false,
79
  "rstrip": true,
80
  "single_word": false,
81
  "special": false
82
  },
83
+ "11": {
84
  "content": "४५",
85
  "lstrip": true,
86
  "normalized": false,
 
88
  "single_word": false,
89
  "special": false
90
  },
91
+ "12": {
92
+ "content": "९४",
93
+ "lstrip": true,
94
+ "normalized": false,
95
+ "rstrip": true,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
  "13": {
100
+ "content": "४४",
101
  "lstrip": true,
102
  "normalized": false,
103
  "rstrip": true,
 
105
  "special": false
106
  },
107
  "14": {
108
+ "content": "५०",
109
+ "lstrip": true,
110
+ "normalized": false,
111
+ "rstrip": true,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "15": {
116
+ "content": "२९",
117
  "lstrip": true,
118
  "normalized": false,
119
  "rstrip": true,
 
121
  "special": false
122
  },
123
  "16": {
124
+ "content": "८२",
125
  "lstrip": true,
126
  "normalized": false,
127
  "rstrip": true,
 
129
  "special": false
130
  },
131
  "17": {
132
+ "content": "८८",
133
  "lstrip": true,
134
  "normalized": false,
135
  "rstrip": true,
 
137
  "special": false
138
  },
139
  "18": {
140
+ "content": "६२",
141
  "lstrip": true,
142
  "normalized": false,
143
  "rstrip": true,
144
  "single_word": false,
145
  "special": false
146
  },
147
+ "19": {
148
+ "content": "१३",
149
  "lstrip": true,
150
  "normalized": false,
151
  "rstrip": true,
 
153
  "special": false
154
  },
155
  "21": {
156
+ "content": "१९",
157
  "lstrip": true,
158
  "normalized": false,
159
  "rstrip": true,
 
161
  "special": false
162
  },
163
  "22": {
164
+ "content": "५९",
165
  "lstrip": true,
166
  "normalized": false,
167
  "rstrip": true,
 
169
  "special": false
170
  },
171
  "23": {
172
+ "content": "६०",
173
  "lstrip": true,
174
  "normalized": false,
175
  "rstrip": true,
 
177
  "special": false
178
  },
179
  "24": {
180
+ "content": "४७",
181
  "lstrip": true,
182
  "normalized": false,
183
  "rstrip": true,
 
185
  "special": false
186
  },
187
  "25": {
188
+ "content": "५२",
189
  "lstrip": true,
190
  "normalized": false,
191
  "rstrip": true,
 
193
  "special": false
194
  },
195
  "26": {
196
+ "content": "३५",
 
 
 
 
 
 
 
 
197
  "lstrip": true,
198
  "normalized": false,
199
  "rstrip": true,
 
201
  "special": false
202
  },
203
  "28": {
204
+ "content": "��९",
205
  "lstrip": true,
206
  "normalized": false,
207
  "rstrip": true,
 
209
  "special": false
210
  },
211
  "29": {
212
+ "content": "२६",
213
  "lstrip": true,
214
  "normalized": false,
215
  "rstrip": true,
 
217
  "special": false
218
  },
219
  "30": {
220
+ "content": "७०",
221
  "lstrip": true,
222
  "normalized": false,
223
  "rstrip": true,
 
225
  "special": false
226
  },
227
  "31": {
228
+ "content": "३२",
229
  "lstrip": true,
230
  "normalized": false,
231
  "rstrip": true,
 
233
  "special": false
234
  },
235
  "32": {
236
+ "content": "७६",
237
  "lstrip": true,
238
  "normalized": false,
239
  "rstrip": true,
 
241
  "special": false
242
  },
243
  "33": {
244
+ "content": "४८",
245
  "lstrip": true,
246
  "normalized": false,
247
  "rstrip": true,
 
249
  "special": false
250
  },
251
  "34": {
252
+ "content": "९७",
253
  "lstrip": true,
254
  "normalized": false,
255
  "rstrip": true,
256
  "single_word": false,
257
  "special": false
258
  },
259
+ "35": {
260
+ "content": "२०",
261
  "lstrip": true,
262
  "normalized": false,
263
  "rstrip": true,
264
  "single_word": false,
265
  "special": false
266
  },
267
+ "36": {
268
+ "content": "९२",
269
  "lstrip": true,
270
  "normalized": false,
271
  "rstrip": true,
 
273
  "special": false
274
  },
275
  "38": {
276
+ "content": "१८",
 
 
 
 
 
 
 
 
277
  "lstrip": true,
278
  "normalized": false,
279
  "rstrip": true,
 
281
  "special": false
282
  },
283
  "40": {
284
+ "content": "८१",
285
  "lstrip": true,
286
  "normalized": false,
287
  "rstrip": true,
288
  "single_word": false,
289
  "special": false
290
  },
291
+ "42": {
292
+ "content": "९३",
293
  "lstrip": true,
294
  "normalized": false,
295
  "rstrip": true,
296
  "single_word": false,
297
  "special": false
298
  },
299
+ "43": {
300
+ "content": "६९",
301
  "lstrip": true,
302
  "normalized": false,
303
  "rstrip": true,
304
  "single_word": false,
305
  "special": false
306
  },
307
+ "44": {
308
+ "content": "५३",
309
  "lstrip": true,
310
  "normalized": false,
311
  "rstrip": true,
312
  "single_word": false,
313
  "special": false
314
  },
315
+ "45": {
316
+ "content": "६१",
317
  "lstrip": true,
318
  "normalized": false,
319
  "rstrip": true,
 
321
  "special": false
322
  },
323
  "46": {
324
+ "content": "८३",
325
  "lstrip": true,
326
  "normalized": false,
327
  "rstrip": true,
 
329
  "special": false
330
  },
331
  "47": {
332
+ "content": "९५",
333
  "lstrip": true,
334
  "normalized": false,
335
  "rstrip": true,
336
  "single_word": false,
337
  "special": false
338
  },
339
+ "49": {
340
+ "content": "६४",
341
  "lstrip": true,
342
  "normalized": false,
343
  "rstrip": true,
344
  "single_word": false,
345
  "special": false
346
  },
347
+ "50": {
348
+ "content": "६५",
349
  "lstrip": true,
350
  "normalized": false,
351
  "rstrip": true,
352
  "single_word": false,
353
  "special": false
354
  },
355
+ "51": {
356
+ "content": "७३",
357
  "lstrip": true,
358
  "normalized": false,
359
  "rstrip": true,
360
  "single_word": false,
361
  "special": false
362
  },
363
+ "52": {
364
+ "content": "८६",
365
  "lstrip": true,
366
  "normalized": false,
367
  "rstrip": true,
 
369
  "special": false
370
  },
371
  "53": {
372
+ "content": "७२",
373
  "lstrip": true,
374
  "normalized": false,
375
  "rstrip": true,
 
377
  "special": false
378
  },
379
  "54": {
380
+ "content": "३३",
381
  "lstrip": true,
382
  "normalized": false,
383
  "rstrip": true,
 
385
  "special": false
386
  },
387
  "55": {
388
+ "content": "२८",
389
+ "lstrip": true,
390
+ "normalized": false,
391
+ "rstrip": true,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "56": {
396
+ "content": "१०",
397
  "lstrip": true,
398
  "normalized": false,
399
  "rstrip": true,
 
401
  "special": false
402
  },
403
  "57": {
404
+ "content": "९१",
405
  "lstrip": true,
406
  "normalized": false,
407
  "rstrip": true,
 
409
  "special": false
410
  },
411
  "58": {
412
+ "content": "४२",
413
  "lstrip": true,
414
  "normalized": false,
415
  "rstrip": true,
 
417
  "special": false
418
  },
419
  "59": {
420
+ "content": "४६",
421
  "lstrip": true,
422
  "normalized": false,
423
  "rstrip": true,
 
425
  "special": false
426
  },
427
  "60": {
428
+ "content": "११",
429
  "lstrip": true,
430
  "normalized": false,
431
  "rstrip": true,
 
433
  "special": false
434
  },
435
  "61": {
436
+ "content": "७५",
437
  "lstrip": true,
438
  "normalized": false,
439
  "rstrip": true,
 
441
  "special": false
442
  },
443
  "62": {
444
+ "content": "३०",
445
  "lstrip": true,
446
  "normalized": false,
447
  "rstrip": true,
 
449
  "special": false
450
  },
451
  "63": {
452
+ "content": "५८",
453
+ "lstrip": true,
454
+ "normalized": false,
455
+ "rstrip": true,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "64": {
460
+ "content": "४३",
461
  "lstrip": true,
462
  "normalized": false,
463
  "rstrip": true,
 
465
  "special": false
466
  },
467
  "65": {
468
+ "content": "७८",
469
  "lstrip": true,
470
  "normalized": false,
471
  "rstrip": true,
 
473
  "special": false
474
  },
475
  "66": {
476
+ "content": "५१",
477
  "lstrip": true,
478
  "normalized": false,
479
  "rstrip": true,
 
481
  "special": false
482
  },
483
  "67": {
484
+ "content": "५५",
485
  "lstrip": true,
486
  "normalized": false,
487
  "rstrip": true,
 
489
  "special": false
490
  },
491
  "68": {
492
+ "content": "२४",
493
  "lstrip": true,
494
  "normalized": false,
495
  "rstrip": true,
 
497
  "special": false
498
  },
499
  "70": {
500
+ "content": "८४",
501
  "lstrip": true,
502
  "normalized": false,
503
  "rstrip": true,
 
505
  "special": false
506
  },
507
  "71": {
508
+ "content": "७७",
509
  "lstrip": true,
510
  "normalized": false,
511
  "rstrip": true,
 
513
  "special": false
514
  },
515
  "72": {
516
+ "content": "६३",
517
  "lstrip": true,
518
  "normalized": false,
519
  "rstrip": true,
 
521
  "special": false
522
  },
523
  "73": {
524
+ "content": "८७",
 
 
 
 
 
 
 
 
525
  "lstrip": true,
526
  "normalized": false,
527
  "rstrip": true,
 
529
  "special": false
530
  },
531
  "75": {
532
+ "content": "६७",
533
  "lstrip": true,
534
  "normalized": false,
535
  "rstrip": true,
 
537
  "special": false
538
  },
539
  "76": {
540
+ "content": "२२",
541
  "lstrip": true,
542
  "normalized": false,
543
  "rstrip": true,
 
545
  "special": false
546
  },
547
  "78": {
548
+ "content": "१५",
549
  "lstrip": true,
550
  "normalized": false,
551
  "rstrip": true,
 
553
  "special": false
554
  },
555
  "79": {
556
+ "content": "१००",
557
  "lstrip": true,
558
  "normalized": false,
559
  "rstrip": true,
 
561
  "special": false
562
  },
563
  "80": {
564
+ "content": "७१",
565
  "lstrip": true,
566
  "normalized": false,
567
  "rstrip": true,
 
569
  "special": false
570
  },
571
  "81": {
572
+ "content": "३८",
573
  "lstrip": true,
574
  "normalized": false,
575
  "rstrip": true,
 
577
  "special": false
578
  },
579
  "82": {
580
+ "content": "३१",
581
  "lstrip": true,
582
  "normalized": false,
583
  "rstrip": true,
 
585
  "special": false
586
  },
587
  "83": {
588
+ "content": "९८",
589
  "lstrip": true,
590
  "normalized": false,
591
  "rstrip": true,
 
593
  "special": false
594
  },
595
  "84": {
596
+ "content": "५७",
597
  "lstrip": true,
598
  "normalized": false,
599
  "rstrip": true,
 
601
  "special": false
602
  },
603
  "85": {
604
+ "content": "२१",
605
  "lstrip": true,
606
  "normalized": false,
607
  "rstrip": true,
 
617
  "special": false
618
  },
619
  "87": {
620
+ "content": "१४",
621
  "lstrip": true,
622
  "normalized": false,
623
  "rstrip": true,
 
625
  "special": false
626
  },
627
  "88": {
628
+ "content": "८०",
629
  "lstrip": true,
630
  "normalized": false,
631
  "rstrip": true,
 
633
  "special": false
634
  },
635
  "89": {
636
+ "content": "२३",
637
  "lstrip": true,
638
  "normalized": false,
639
  "rstrip": true,
 
641
  "special": false
642
  },
643
  "90": {
644
+ "content": "७९",
645
  "lstrip": true,
646
  "normalized": false,
647
  "rstrip": true,
 
649
  "special": false
650
  },
651
  "91": {
652
+ "content": "३४",
653
  "lstrip": true,
654
  "normalized": false,
655
  "rstrip": true,
 
657
  "special": false
658
  },
659
  "92": {
660
+ "content": "३६",
661
  "lstrip": true,
662
  "normalized": false,
663
  "rstrip": true,
 
665
  "special": false
666
  },
667
  "93": {
668
+ "content": "४१",
669
  "lstrip": true,
670
  "normalized": false,
671
  "rstrip": true,
 
673
  "special": false
674
  },
675
  "94": {
676
+ "content": "१७",
677
  "lstrip": true,
678
  "normalized": false,
679
  "rstrip": true,
 
681
  "special": false
682
  },
683
  "95": {
684
+ "content": "१२",
685
  "lstrip": true,
686
  "normalized": false,
687
  "rstrip": true,
 
689
  "special": false
690
  },
691
  "96": {
692
+ "content": "७४",
693
  "lstrip": true,
694
  "normalized": false,
695
  "rstrip": true,
 
697
  "special": false
698
  },
699
  "97": {
700
+ "content": "९६",
 
 
 
 
 
 
 
 
701
  "lstrip": true,
702
  "normalized": false,
703
  "rstrip": true,
 
705
  "special": false
706
  },
707
  "99": {
708
+ "content": "३९",
709
  "lstrip": true,
710
  "normalized": false,
711
  "rstrip": true,
 
713
  "special": false
714
  },
715
  "100": {
716
+ "content": "८५",
717
  "lstrip": true,
718
  "normalized": false,
719
  "rstrip": true,
 
721
  "special": false
722
  },
723
  "101": {
724
+ "content": "६८",
725
  "lstrip": true,
726
  "normalized": false,
727
  "rstrip": true,
vocab.json CHANGED
@@ -1,106 +1,106 @@
1
  {
2
  "[PAD]": 103,
3
  "[UNK]": 102,
4
- "|": 19,
5
- "०": 69,
6
- "१": 52,
7
- "१०": 36,
8
- "१००": 51,
9
- "११": 39,
10
- "१२": 37,
11
- "१३": 58,
12
- "१४": 29,
13
- "१५": 20,
14
- "१६": 40,
15
- "१७": 95,
16
- "१८": 99,
17
- "१९": 71,
18
- "२": 35,
19
- "२०": 89,
20
- "२१": 72,
21
- "२२": 82,
22
- "२३": 5,
23
- "२४": 17,
24
- "२५": 8,
25
- "२६": 74,
26
- "२७": 96,
27
- "२८": 79,
28
- "२९": 3,
29
- "३": 0,
30
- "३०": 43,
31
- "३१": 14,
32
- "३२": 81,
33
- "३३": 30,
34
- "३४": 88,
35
- "३५": 42,
36
- "३६": 90,
37
- "३७": 55,
38
- "३८": 100,
39
- "३९": 44,
40
- "४": 45,
41
- "४०": 83,
42
- "४१": 98,
43
- "४२": 73,
44
- "४३": 68,
45
- "४४": 26,
46
- "४५": 12,
47
- "४६": 25,
48
- "४७": 7,
49
- "४८": 1,
50
  "४९": 86,
51
- "५": 77,
52
- "५०": 80,
53
- "५१": 60,
54
- "५२": 31,
55
- "५३": 59,
56
- "५४": 57,
57
- "५५": 16,
58
- "५६": 50,
59
- "५७": 13,
60
- "५८": 18,
61
- "५९": 48,
62
- "६": 56,
63
- "६०": 53,
64
- "६१": 6,
65
- "६२": 75,
66
- "६३": 21,
67
- "६४": 66,
68
- "६५": 92,
69
- "६६": 41,
70
- "६७": 47,
71
- "६८": 9,
72
- "६९": 61,
73
- "७": 64,
74
- "७०": 93,
75
- "७१": 84,
76
- "७२": 65,
77
- "७३": 38,
78
- "७४": 46,
79
- "७५": 62,
80
- "७६": 24,
81
- "७७": 91,
82
- "७८": 94,
83
- "७९": 87,
84
- "८": 11,
85
- "८०": 63,
86
- "८१": 85,
87
- "८२": 67,
88
- "८३": 97,
89
- "८४": 27,
90
- "८५": 33,
91
- "८६": 34,
92
- "८७": 10,
93
- "८८": 2,
94
- "८९": 54,
95
- "९": 15,
96
- "९०": 28,
97
- "९१": 76,
98
- "९२": 22,
99
- "९३": 78,
100
- "९४": 70,
101
- "९५": 4,
102
- "९६": 32,
103
- "९७": 23,
104
- "९८": 49,
105
- "९९": 101
106
  }
 
1
  {
2
  "[PAD]": 103,
3
  "[UNK]": 102,
4
+ "|": 0,
5
+ "०": 41,
6
+ "१": 77,
7
+ "१०": 56,
8
+ "१००": 79,
9
+ "११": 60,
10
+ "१२": 95,
11
+ "१३": 19,
12
+ "१४": 87,
13
+ "१५": 78,
14
+ "१६": 1,
15
+ "१७": 94,
16
+ "१८": 38,
17
+ "१९": 21,
18
+ "२": 37,
19
+ "२०": 35,
20
+ "२१": 85,
21
+ "२२": 76,
22
+ "२३": 89,
23
+ "२४": 68,
24
+ "२५": 7,
25
+ "२६": 29,
26
+ "२७": 4,
27
+ "२८": 55,
28
+ "२९": 15,
29
+ "३": 74,
30
+ "३०": 62,
31
+ "३१": 82,
32
+ "३२": 31,
33
+ "३३": 54,
34
+ "३४": 91,
35
+ "३५": 26,
36
+ "३६": 92,
37
+ "३७": 2,
38
+ "३८": 81,
39
+ "३९": 99,
40
+ "४": 20,
41
+ "४०": 8,
42
+ "४१": 93,
43
+ "४२": 58,
44
+ "४३": 64,
45
+ "४४": 13,
46
+ "४५": 11,
47
+ "४६": 59,
48
+ "४७": 24,
49
+ "४८": 33,
50
  "४९": 86,
51
+ "५": 98,
52
+ "५०": 14,
53
+ "५१": 66,
54
+ "५२": 25,
55
+ "५३": 44,
56
+ "५४": 5,
57
+ "५५": 67,
58
+ "५६": 3,
59
+ "५७": 84,
60
+ "५८": 63,
61
+ "५९": 22,
62
+ "६": 69,
63
+ "६०": 23,
64
+ "६१": 45,
65
+ "६२": 18,
66
+ "६३": 72,
67
+ "६४": 49,
68
+ "६५": 50,
69
+ "६६": 9,
70
+ "६७": 75,
71
+ "६८": 101,
72
+ "६९": 43,
73
+ "७": 39,
74
+ "७०": 30,
75
+ "७१": 80,
76
+ "७२": 53,
77
+ "७३": 51,
78
+ "७४": 96,
79
+ "७५": 61,
80
+ "७६": 32,
81
+ "७७": 71,
82
+ "७८": 65,
83
+ "७९": 90,
84
+ "८": 27,
85
+ "८०": 88,
86
+ "८१": 40,
87
+ "८२": 16,
88
+ "८३": 46,
89
+ "८४": 70,
90
+ "८५": 100,
91
+ "८६": 52,
92
+ "८७": 73,
93
+ "८८": 17,
94
+ "८९": 6,
95
+ "९": 48,
96
+ "९०": 10,
97
+ "९१": 57,
98
+ "९२": 36,
99
+ "९३": 42,
100
+ "९४": 12,
101
+ "९५": 47,
102
+ "९६": 97,
103
+ "९७": 34,
104
+ "९८": 83,
105
+ "९९": 28
106
  }