GinnM commited on
Commit
6492c5d
·
1 Parent(s): bf3b1d7

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +9 -0
  2. tokenizer.json +940 -0
  3. tokenizer_config.json +5 -0
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<cls>",
3
+ "cls_token": "<cls>",
4
+ "eos_token": "<sep>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "<sep>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,940 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<pad>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<cls>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "<sep>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": null,
54
+ "post_processor": {
55
+ "type": "TemplateProcessing",
56
+ "single": [
57
+ {
58
+ "SpecialToken": {
59
+ "id": "<cls>",
60
+ "type_id": 0
61
+ }
62
+ },
63
+ {
64
+ "Sequence": {
65
+ "id": "A",
66
+ "type_id": 0
67
+ }
68
+ },
69
+ {
70
+ "SpecialToken": {
71
+ "id": "<sep>",
72
+ "type_id": 0
73
+ }
74
+ }
75
+ ],
76
+ "pair": [
77
+ {
78
+ "SpecialToken": {
79
+ "id": "<cls>",
80
+ "type_id": 0
81
+ }
82
+ },
83
+ {
84
+ "Sequence": {
85
+ "id": "A",
86
+ "type_id": 0
87
+ }
88
+ },
89
+ {
90
+ "SpecialToken": {
91
+ "id": "<sep>",
92
+ "type_id": 0
93
+ }
94
+ },
95
+ {
96
+ "Sequence": {
97
+ "id": "B",
98
+ "type_id": 1
99
+ }
100
+ },
101
+ {
102
+ "SpecialToken": {
103
+ "id": "<sep>",
104
+ "type_id": 1
105
+ }
106
+ }
107
+ ],
108
+ "special_tokens": {
109
+ "<cls>": {
110
+ "id": "<cls>",
111
+ "ids": [
112
+ 1
113
+ ],
114
+ "tokens": [
115
+ "<cls>"
116
+ ]
117
+ },
118
+ "<sep>": {
119
+ "id": "<sep>",
120
+ "ids": [
121
+ 2
122
+ ],
123
+ "tokens": [
124
+ "<sep>"
125
+ ]
126
+ }
127
+ }
128
+ },
129
+ "decoder": {
130
+ "type": "Metaspace",
131
+ "replacement": "▁",
132
+ "add_prefix_space": true
133
+ },
134
+ "model": {
135
+ "type": "Unigram",
136
+ "unk_id": 3,
137
+ "vocab": [
138
+ [
139
+ "<pad>",
140
+ 0.0
141
+ ],
142
+ [
143
+ "<cls>",
144
+ 0.0
145
+ ],
146
+ [
147
+ "<sep>",
148
+ 0.0
149
+ ],
150
+ [
151
+ "<unk>",
152
+ 0.0
153
+ ],
154
+ [
155
+ "<mask>",
156
+ 0.0
157
+ ],
158
+ [
159
+ "Y",
160
+ -3.1509086450638755
161
+ ],
162
+ [
163
+ "H",
164
+ -3.3896571454274156
165
+ ],
166
+ [
167
+ "M",
168
+ -3.419833263673423
169
+ ],
170
+ [
171
+ "Q",
172
+ -3.5122514279760804
173
+ ],
174
+ [
175
+ "F",
176
+ -3.5318715443351483
177
+ ],
178
+ [
179
+ "N",
180
+ -3.6735769356989
181
+ ],
182
+ [
183
+ "A",
184
+ -3.7004076405792734
185
+ ],
186
+ [
187
+ "L",
188
+ -3.7517736006002753
189
+ ],
190
+ [
191
+ "C",
192
+ -3.82533728855746
193
+ ],
194
+ [
195
+ "T",
196
+ -3.845094787136208
197
+ ],
198
+ [
199
+ "R",
200
+ -3.85120522720325
201
+ ],
202
+ [
203
+ "P",
204
+ -3.8944513102878826
205
+ ],
206
+ [
207
+ "W",
208
+ -3.910536407899921
209
+ ],
210
+ [
211
+ "K",
212
+ -3.9181905087306057
213
+ ],
214
+ [
215
+ "E",
216
+ -3.9392286405437424
217
+ ],
218
+ [
219
+ "G",
220
+ -3.9436969789665177
221
+ ],
222
+ [
223
+ "S",
224
+ -3.9513056815431504
225
+ ],
226
+ [
227
+ "V",
228
+ -3.9521272143039567
229
+ ],
230
+ [
231
+ "D",
232
+ -4.053606203549702
233
+ ],
234
+ [
235
+ "I",
236
+ -4.087833115224349
237
+ ],
238
+ [
239
+ "AA",
240
+ -4.764966670027674
241
+ ],
242
+ [
243
+ "LL",
244
+ -4.902886691904113
245
+ ],
246
+ [
247
+ "LA",
248
+ -5.08072623495822
249
+ ],
250
+ [
251
+ "AL",
252
+ -5.087271044534189
253
+ ],
254
+ [
255
+ "SS",
256
+ -5.121051536194255
257
+ ],
258
+ [
259
+ "LS",
260
+ -5.240240142812667
261
+ ],
262
+ [
263
+ "SL",
264
+ -5.246958672682522
265
+ ],
266
+ [
267
+ "GG",
268
+ -5.31104069359229
269
+ ],
270
+ [
271
+ "AG",
272
+ -5.327169120628199
273
+ ],
274
+ [
275
+ "VL",
276
+ -5.391035733278736
277
+ ],
278
+ [
279
+ "RL",
280
+ -5.396912703019062
281
+ ],
282
+ [
283
+ "AV",
284
+ -5.417974840306595
285
+ ],
286
+ [
287
+ "GL",
288
+ -5.438738874872717
289
+ ],
290
+ [
291
+ "LV",
292
+ -5.4539685415962555
293
+ ],
294
+ [
295
+ "SG",
296
+ -5.468530821013996
297
+ ],
298
+ [
299
+ "RR",
300
+ -5.469381686553186
301
+ ],
302
+ [
303
+ "EL",
304
+ -5.487314422405181
305
+ ],
306
+ [
307
+ "VA",
308
+ -5.500947343968374
309
+ ],
310
+ [
311
+ "LR",
312
+ -5.504465608087704
313
+ ],
314
+ [
315
+ "GA",
316
+ -5.508572577172163
317
+ ],
318
+ [
319
+ "SA",
320
+ -5.509497866677481
321
+ ],
322
+ [
323
+ "EE",
324
+ -5.509638117132358
325
+ ],
326
+ [
327
+ "LP",
328
+ -5.525125791450426
329
+ ],
330
+ [
331
+ "LG",
332
+ -5.531523151805965
333
+ ],
334
+ [
335
+ "TL",
336
+ -5.550336580253715
337
+ ],
338
+ [
339
+ "AS",
340
+ -5.555359709285966
341
+ ],
342
+ [
343
+ "VV",
344
+ -5.5822783949387205
345
+ ],
346
+ [
347
+ "LT",
348
+ -5.5949437276928276
349
+ ],
350
+ [
351
+ "LD",
352
+ -5.609175705941702
353
+ ],
354
+ [
355
+ "GS",
356
+ -5.615707589762941
357
+ ],
358
+ [
359
+ "TA",
360
+ -5.6212468269382665
361
+ ],
362
+ [
363
+ "LE",
364
+ -5.62152407771382
365
+ ],
366
+ [
367
+ "DL",
368
+ -5.625455355831432
369
+ ],
370
+ [
371
+ "KK",
372
+ -5.632789431683037
373
+ ],
374
+ [
375
+ "EA",
376
+ -5.635793018508904
377
+ ],
378
+ [
379
+ "IL",
380
+ -5.6365381446185285
381
+ ],
382
+ [
383
+ "GV",
384
+ -5.674808795328973
385
+ ],
386
+ [
387
+ "AR",
388
+ -5.681247789262706
389
+ ],
390
+ [
391
+ "SV",
392
+ -5.6919674499343085
393
+ ],
394
+ [
395
+ "RA",
396
+ -5.70230195746182
397
+ ],
398
+ [
399
+ "ST",
400
+ -5.705482521677933
401
+ ],
402
+ [
403
+ "AE",
404
+ -5.707097244257659
405
+ ],
406
+ [
407
+ "DG",
408
+ -5.7111206266490395
409
+ ],
410
+ [
411
+ "KL",
412
+ -5.714200191449681
413
+ ],
414
+ [
415
+ "PA",
416
+ -5.714888070036569
417
+ ],
418
+ [
419
+ "AT",
420
+ -5.7387016264645485
421
+ ],
422
+ [
423
+ "VS",
424
+ -5.740918194807335
425
+ ],
426
+ [
427
+ "DA",
428
+ -5.747346276581707
429
+ ],
430
+ [
431
+ "SP",
432
+ -5.750453122200705
433
+ ],
434
+ [
435
+ "IS",
436
+ -5.7575142466303255
437
+ ],
438
+ [
439
+ "GR",
440
+ -5.757690327764845
441
+ ],
442
+ [
443
+ "LI",
444
+ -5.76013806021334
445
+ ],
446
+ [
447
+ "TG",
448
+ -5.761512737903605
449
+ ],
450
+ [
451
+ "TV",
452
+ -5.764206078890442
453
+ ],
454
+ [
455
+ "IA",
456
+ -5.777844220177171
457
+ ],
458
+ [
459
+ "AD",
460
+ -5.780310267356683
461
+ ],
462
+ [
463
+ "TT",
464
+ -5.785480977584342
465
+ ],
466
+ [
467
+ "TS",
468
+ -5.791507570316087
469
+ ],
470
+ [
471
+ "LK",
472
+ -5.802866544052941
473
+ ],
474
+ [
475
+ "EK",
476
+ -5.804278043814918
477
+ ],
478
+ [
479
+ "GT",
480
+ -5.819460987630665
481
+ ],
482
+ [
483
+ "PP",
484
+ -5.821173747611947
485
+ ],
486
+ [
487
+ "KE",
488
+ -5.822033413758428
489
+ ],
490
+ [
491
+ "ER",
492
+ -5.852332647299409
493
+ ],
494
+ [
495
+ "SD",
496
+ -5.852838901980473
497
+ ],
498
+ [
499
+ "SI",
500
+ -5.8533832214242345
501
+ ],
502
+ [
503
+ "PS",
504
+ -5.854586170941655
505
+ ],
506
+ [
507
+ "AP",
508
+ -5.856858689030043
509
+ ],
510
+ [
511
+ "DE",
512
+ -5.867028022084829
513
+ ],
514
+ [
515
+ "GD",
516
+ -5.876378483403268
517
+ ],
518
+ [
519
+ "DD",
520
+ -5.876725852666299
521
+ ],
522
+ [
523
+ "NL",
524
+ -5.884448903872826
525
+ ],
526
+ [
527
+ "VT",
528
+ -5.88772386460313
529
+ ],
530
+ [
531
+ "PL",
532
+ -5.907796388921314
533
+ ],
534
+ [
535
+ "RS",
536
+ -5.909472604917513
537
+ ],
538
+ [
539
+ "TP",
540
+ -5.910221549171844
541
+ ],
542
+ [
543
+ "EI",
544
+ -5.916003495337167
545
+ ],
546
+ [
547
+ "SR",
548
+ -5.918245244245625
549
+ ],
550
+ [
551
+ "GE",
552
+ -5.918941142869885
553
+ ],
554
+ [
555
+ "VD",
556
+ -5.924154015576642
557
+ ],
558
+ [
559
+ "ID",
560
+ -5.927743633953453
561
+ ],
562
+ [
563
+ "EV",
564
+ -5.931109554224724
565
+ ],
566
+ [
567
+ "VG",
568
+ -5.936172020091348
569
+ ],
570
+ [
571
+ "VE",
572
+ -5.9407666092559595
573
+ ],
574
+ [
575
+ "GI",
576
+ -5.953825669400469
577
+ ],
578
+ [
579
+ "DV",
580
+ -5.964826749154037
581
+ ],
582
+ [
583
+ "KI",
584
+ -5.965011602414927
585
+ ],
586
+ [
587
+ "AI",
588
+ -5.966134585178457
589
+ ],
590
+ [
591
+ "IV",
592
+ -5.966562340591041
593
+ ],
594
+ [
595
+ "RV",
596
+ -5.968891258580745
597
+ ],
598
+ [
599
+ "FL",
600
+ -5.973183082312524
601
+ ],
602
+ [
603
+ "SE",
604
+ -5.978660102843701
605
+ ],
606
+ [
607
+ "IE",
608
+ -5.990855196049141
609
+ ],
610
+ [
611
+ "QL",
612
+ -5.994359870798313
613
+ ],
614
+ [
615
+ "PV",
616
+ -5.995163841048182
617
+ ],
618
+ [
619
+ "RE",
620
+ -5.9975055476878065
621
+ ],
622
+ [
623
+ "ED",
624
+ -6.015188056772153
625
+ ],
626
+ [
627
+ "KA",
628
+ -6.01643176845711
629
+ ],
630
+ [
631
+ "LF",
632
+ -6.0171694056170075
633
+ ],
634
+ [
635
+ "IG",
636
+ -6.020600912983243
637
+ ],
638
+ [
639
+ "RG",
640
+ -6.022579399372601
641
+ ],
642
+ [
643
+ "II",
644
+ -6.026894696516262
645
+ ],
646
+ [
647
+ "PG",
648
+ -6.027207909959376
649
+ ],
650
+ [
651
+ "VR",
652
+ -6.03105386262591
653
+ ],
654
+ [
655
+ "KS",
656
+ -6.039104198463601
657
+ ],
658
+ [
659
+ "IT",
660
+ -6.044874540149573
661
+ ],
662
+ [
663
+ "LQ",
664
+ -6.0719229057700765
665
+ ],
666
+ [
667
+ "VP",
668
+ -6.073861256741292
669
+ ],
670
+ [
671
+ "NG",
672
+ -6.087855724157491
673
+ ],
674
+ [
675
+ "DS",
676
+ -6.093824372411833
677
+ ],
678
+ [
679
+ "EG",
680
+ -6.095757478155381
681
+ ],
682
+ [
683
+ "GK",
684
+ -6.099067993197011
685
+ ],
686
+ [
687
+ "ES",
688
+ -6.101068768641547
689
+ ],
690
+ [
691
+ "TI",
692
+ -6.103630143225411
693
+ ],
694
+ [
695
+ "KT",
696
+ -6.108252739228014
697
+ ],
698
+ [
699
+ "ET",
700
+ -6.1155235309558655
701
+ ],
702
+ [
703
+ "VI",
704
+ -6.115661963794853
705
+ ],
706
+ [
707
+ "DI",
708
+ -6.120589231585161
709
+ ],
710
+ [
711
+ "RP",
712
+ -6.121380333766949
713
+ ],
714
+ [
715
+ "SF",
716
+ -6.133861651537437
717
+ ],
718
+ [
719
+ "PE",
720
+ -6.138418425045256
721
+ ],
722
+ [
723
+ "FS",
724
+ -6.139131547464277
725
+ ],
726
+ [
727
+ "RI",
728
+ -6.1391477529387615
729
+ ],
730
+ [
731
+ "LN",
732
+ -6.1404965267449985
733
+ ],
734
+ [
735
+ "NS",
736
+ -6.144657817213831
737
+ ],
738
+ [
739
+ "KV",
740
+ -6.152332752044504
741
+ ],
742
+ [
743
+ "RD",
744
+ -6.154617866692302
745
+ ],
746
+ [
747
+ "KR",
748
+ -6.155330258447307
749
+ ],
750
+ [
751
+ "KD",
752
+ -6.1652649821258905
753
+ ],
754
+ [
755
+ "SK",
756
+ -6.1693163139654725
757
+ ],
758
+ [
759
+ "IP",
760
+ -6.1828230004197025
761
+ ],
762
+ [
763
+ "NI",
764
+ -6.184849882544997
765
+ ],
766
+ [
767
+ "DP",
768
+ -6.186014251325707
769
+ ],
770
+ [
771
+ "QA",
772
+ -6.191570123841911
773
+ ],
774
+ [
775
+ "PD",
776
+ -6.194909821917465
777
+ ],
778
+ [
779
+ "TD",
780
+ -6.196392015508456
781
+ ],
782
+ [
783
+ "RT",
784
+ -6.209516518529492
785
+ ],
786
+ [
787
+ "IK",
788
+ -6.214740590660165
789
+ ],
790
+ [
791
+ "PT",
792
+ -6.225990661106273
793
+ ],
794
+ [
795
+ "KN",
796
+ -6.239211240911878
797
+ ],
798
+ [
799
+ "RK",
800
+ -6.242072582821788
801
+ ],
802
+ [
803
+ "DR",
804
+ -6.254359548545143
805
+ ],
806
+ [
807
+ "NA",
808
+ -6.26545887359997
809
+ ],
810
+ [
811
+ "SN",
812
+ -6.285562575311767
813
+ ],
814
+ [
815
+ "GF",
816
+ -6.28571824602113
817
+ ],
818
+ [
819
+ "TE",
820
+ -6.309775318855182
821
+ ],
822
+ [
823
+ "IR",
824
+ -6.31828807247183
825
+ ],
826
+ [
827
+ "EN",
828
+ -6.318322128704557
829
+ ],
830
+ [
831
+ "KG",
832
+ -6.32060657047373
833
+ ],
834
+ [
835
+ "NN",
836
+ -6.323083715957198
837
+ ],
838
+ [
839
+ "KP",
840
+ -6.341333299431328
841
+ ],
842
+ [
843
+ "GP",
844
+ -6.3491864918774095
845
+ ],
846
+ [
847
+ "NP",
848
+ -6.34927814518192
849
+ ],
850
+ [
851
+ "YL",
852
+ -6.351753043279688
853
+ ],
854
+ [
855
+ "IN",
856
+ -6.3572834033221515
857
+ ],
858
+ [
859
+ "NV",
860
+ -6.360839268616893
861
+ ],
862
+ [
863
+ "DT",
864
+ -6.375617439783092
865
+ ],
866
+ [
867
+ "AK",
868
+ -6.375711508198858
869
+ ],
870
+ [
871
+ "EQ",
872
+ -6.376112537075828
873
+ ],
874
+ [
875
+ "FG",
876
+ -6.386837832927128
877
+ ],
878
+ [
879
+ "FD",
880
+ -6.394930765846015
881
+ ],
882
+ [
883
+ "ND",
884
+ -6.398211625118337
885
+ ],
886
+ [
887
+ "GN",
888
+ -6.40179470374931
889
+ ],
890
+ [
891
+ "VK",
892
+ -6.402752834893931
893
+ ],
894
+ [
895
+ "FV",
896
+ -6.409266328977472
897
+ ],
898
+ [
899
+ "TR",
900
+ -6.416946537362946
901
+ ],
902
+ [
903
+ "VF",
904
+ -6.417794842397651
905
+ ],
906
+ [
907
+ "SQ",
908
+ -6.446275702521779
909
+ ],
910
+ [
911
+ "PR",
912
+ -6.467885446120821
913
+ ],
914
+ [
915
+ "IF",
916
+ -6.468539334368343
917
+ ],
918
+ [
919
+ "X",
920
+ -7.375634428511613
921
+ ],
922
+ [
923
+ "B",
924
+ -14.220849172973876
925
+ ],
926
+ [
927
+ "Z",
928
+ -15.447814045084268
929
+ ],
930
+ [
931
+ "U",
932
+ -16.127265163683706
933
+ ],
934
+ [
935
+ "O",
936
+ -19.622373241931182
937
+ ]
938
+ ]
939
+ }
940
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "model_max_length": 1000000000000000019884624838656,
4
+ "tokenizer_class": "PreTrainedTokenizerFast"
5
+ }