lenML commited on
Commit
11ca1e7
1 Parent(s): 159d151

Upload 6 files

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +94 -1777
tokenizer_config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
- "content": "<pad>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -11,7 +12,7 @@
11
  "special": true
12
  },
13
  "1": {
14
- "content": "<eos>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
@@ -19,7 +20,7 @@
19
  "special": true
20
  },
21
  "2": {
22
- "content": "<bos>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
@@ -27,7 +28,7 @@
27
  "special": true
28
  },
29
  "3": {
30
- "content": "<unk>",
31
  "lstrip": false,
32
  "normalized": false,
33
  "rstrip": false,
@@ -35,1959 +36,263 @@
35
  "special": true
36
  },
37
  "4": {
38
- "content": "<mask>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
  "single_word": false,
43
- "special": false
44
  },
45
  "5": {
46
- "content": "<2mass>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
- "special": false
52
  },
53
  "6": {
54
- "content": "[@BOS@]",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
58
  "single_word": false,
59
- "special": false
60
  },
61
  "7": {
62
- "content": "<unused0>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": false
68
- },
69
- "8": {
70
- "content": "<unused1>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": false
76
- },
77
- "9": {
78
- "content": "<unused2>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": false
84
- },
85
- "10": {
86
- "content": "<unused3>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": false
92
- },
93
- "11": {
94
- "content": "<unused4>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": false
100
- },
101
- "12": {
102
- "content": "<unused5>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": false
108
- },
109
- "13": {
110
- "content": "<unused6>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": false
116
- },
117
- "14": {
118
- "content": "<unused7>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "15": {
126
- "content": "<unused8>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "16": {
134
- "content": "<unused9>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "17": {
142
- "content": "<unused10>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "18": {
150
- "content": "<unused11>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "19": {
158
- "content": "<unused12>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "20": {
166
- "content": "<unused13>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "21": {
174
- "content": "<unused14>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "22": {
182
- "content": "<unused15>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "23": {
190
- "content": "<unused16>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "24": {
198
- "content": "<unused17>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": false
204
- },
205
- "25": {
206
- "content": "<unused18>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": false
212
- },
213
- "26": {
214
- "content": "<unused19>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": false
220
- },
221
- "27": {
222
- "content": "<unused20>",
223
- "lstrip": false,
224
- "normalized": false,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": false
228
- },
229
- "28": {
230
- "content": "<unused21>",
231
- "lstrip": false,
232
- "normalized": false,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": false
236
- },
237
- "29": {
238
- "content": "<unused22>",
239
- "lstrip": false,
240
- "normalized": false,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": false
244
- },
245
- "30": {
246
- "content": "<unused23>",
247
- "lstrip": false,
248
- "normalized": false,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": false
252
- },
253
- "31": {
254
- "content": "<unused24>",
255
- "lstrip": false,
256
- "normalized": false,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": false
260
- },
261
- "32": {
262
- "content": "<unused25>",
263
- "lstrip": false,
264
- "normalized": false,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": false
268
- },
269
- "33": {
270
- "content": "<unused26>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": false
276
- },
277
- "34": {
278
- "content": "<unused27>",
279
- "lstrip": false,
280
- "normalized": false,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": false
284
- },
285
- "35": {
286
- "content": "<unused28>",
287
- "lstrip": false,
288
- "normalized": false,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": false
292
- },
293
- "36": {
294
- "content": "<unused29>",
295
- "lstrip": false,
296
- "normalized": false,
297
- "rstrip": false,
298
- "single_word": false,
299
- "special": false
300
- },
301
- "37": {
302
- "content": "<unused30>",
303
- "lstrip": false,
304
- "normalized": false,
305
- "rstrip": false,
306
- "single_word": false,
307
- "special": false
308
- },
309
- "38": {
310
- "content": "<unused31>",
311
  "lstrip": false,
312
  "normalized": false,
313
  "rstrip": false,
314
  "single_word": false,
315
- "special": false
316
- },
317
- "39": {
318
- "content": "<unused32>",
319
- "lstrip": false,
320
- "normalized": false,
321
- "rstrip": false,
322
- "single_word": false,
323
- "special": false
324
- },
325
- "40": {
326
- "content": "<unused33>",
327
- "lstrip": false,
328
- "normalized": false,
329
- "rstrip": false,
330
- "single_word": false,
331
- "special": false
332
- },
333
- "41": {
334
- "content": "<unused34>",
335
- "lstrip": false,
336
- "normalized": false,
337
- "rstrip": false,
338
- "single_word": false,
339
- "special": false
340
- },
341
- "42": {
342
- "content": "<unused35>",
343
- "lstrip": false,
344
- "normalized": false,
345
- "rstrip": false,
346
- "single_word": false,
347
- "special": false
348
- },
349
- "43": {
350
- "content": "<unused36>",
351
- "lstrip": false,
352
- "normalized": false,
353
- "rstrip": false,
354
- "single_word": false,
355
- "special": false
356
- },
357
- "44": {
358
- "content": "<unused37>",
359
- "lstrip": false,
360
- "normalized": false,
361
- "rstrip": false,
362
- "single_word": false,
363
- "special": false
364
- },
365
- "45": {
366
- "content": "<unused38>",
367
- "lstrip": false,
368
- "normalized": false,
369
- "rstrip": false,
370
- "single_word": false,
371
- "special": false
372
- },
373
- "46": {
374
- "content": "<unused39>",
375
- "lstrip": false,
376
- "normalized": false,
377
- "rstrip": false,
378
- "single_word": false,
379
- "special": false
380
- },
381
- "47": {
382
- "content": "<unused40>",
383
- "lstrip": false,
384
- "normalized": false,
385
- "rstrip": false,
386
- "single_word": false,
387
- "special": false
388
- },
389
- "48": {
390
- "content": "<unused41>",
391
- "lstrip": false,
392
- "normalized": false,
393
- "rstrip": false,
394
- "single_word": false,
395
- "special": false
396
  },
397
- "49": {
398
- "content": "<unused42>",
399
  "lstrip": false,
400
  "normalized": false,
401
  "rstrip": false,
402
  "single_word": false,
403
  "special": false
404
  },
405
- "50": {
406
- "content": "<unused43>",
407
  "lstrip": false,
408
  "normalized": false,
409
  "rstrip": false,
410
  "single_word": false,
411
- "special": false
412
  },
413
- "51": {
414
- "content": "<unused44>",
415
  "lstrip": false,
416
  "normalized": false,
417
  "rstrip": false,
418
  "single_word": false,
419
  "special": false
420
  },
421
- "52": {
422
- "content": "<unused45>",
423
  "lstrip": false,
424
  "normalized": false,
425
  "rstrip": false,
426
  "single_word": false,
427
  "special": false
428
  },
429
- "53": {
430
- "content": "<unused46>",
431
  "lstrip": false,
432
  "normalized": false,
433
  "rstrip": false,
434
  "single_word": false,
435
  "special": false
436
  },
437
- "54": {
438
- "content": "<unused47>",
439
  "lstrip": false,
440
  "normalized": false,
441
  "rstrip": false,
442
  "single_word": false,
443
  "special": false
444
  },
445
- "55": {
446
- "content": "<unused48>",
447
  "lstrip": false,
448
  "normalized": false,
449
  "rstrip": false,
450
  "single_word": false,
451
  "special": false
452
  },
453
- "56": {
454
- "content": "<unused49>",
455
  "lstrip": false,
456
  "normalized": false,
457
  "rstrip": false,
458
  "single_word": false,
459
  "special": false
460
  },
461
- "57": {
462
- "content": "<unused50>",
463
  "lstrip": false,
464
  "normalized": false,
465
  "rstrip": false,
466
  "single_word": false,
467
  "special": false
468
  },
469
- "58": {
470
- "content": "<unused51>",
471
  "lstrip": false,
472
  "normalized": false,
473
  "rstrip": false,
474
  "single_word": false,
475
  "special": false
476
  },
477
- "59": {
478
- "content": "<unused52>",
479
  "lstrip": false,
480
  "normalized": false,
481
  "rstrip": false,
482
  "single_word": false,
483
  "special": false
484
  },
485
- "60": {
486
- "content": "<unused53>",
487
  "lstrip": false,
488
  "normalized": false,
489
  "rstrip": false,
490
  "single_word": false,
491
  "special": false
492
  },
493
- "61": {
494
- "content": "<unused54>",
495
  "lstrip": false,
496
  "normalized": false,
497
  "rstrip": false,
498
  "single_word": false,
499
  "special": false
500
  },
501
- "62": {
502
- "content": "<unused55>",
503
  "lstrip": false,
504
  "normalized": false,
505
  "rstrip": false,
506
  "single_word": false,
507
  "special": false
508
  },
509
- "63": {
510
- "content": "<unused56>",
511
  "lstrip": false,
512
  "normalized": false,
513
  "rstrip": false,
514
  "single_word": false,
515
  "special": false
516
  },
517
- "64": {
518
- "content": "<unused57>",
519
  "lstrip": false,
520
  "normalized": false,
521
  "rstrip": false,
522
  "single_word": false,
523
  "special": false
524
  },
525
- "65": {
526
- "content": "<unused58>",
527
  "lstrip": false,
528
  "normalized": false,
529
  "rstrip": false,
530
  "single_word": false,
531
  "special": false
532
  },
533
- "66": {
534
- "content": "<unused59>",
535
  "lstrip": false,
536
  "normalized": false,
537
  "rstrip": false,
538
  "single_word": false,
539
  "special": false
540
  },
541
- "67": {
542
- "content": "<unused60>",
543
  "lstrip": false,
544
  "normalized": false,
545
  "rstrip": false,
546
  "single_word": false,
547
  "special": false
548
  },
549
- "68": {
550
- "content": "<unused61>",
551
  "lstrip": false,
552
  "normalized": false,
553
  "rstrip": false,
554
  "single_word": false,
555
  "special": false
556
  },
557
- "69": {
558
- "content": "<unused62>",
559
  "lstrip": false,
560
  "normalized": false,
561
  "rstrip": false,
562
  "single_word": false,
563
  "special": false
564
  },
565
- "70": {
566
- "content": "<unused63>",
567
  "lstrip": false,
568
  "normalized": false,
569
  "rstrip": false,
570
  "single_word": false,
571
  "special": false
572
  },
573
- "71": {
574
- "content": "<unused64>",
575
  "lstrip": false,
576
  "normalized": false,
577
  "rstrip": false,
578
  "single_word": false,
579
  "special": false
580
  },
581
- "72": {
582
- "content": "<unused65>",
583
  "lstrip": false,
584
  "normalized": false,
585
  "rstrip": false,
586
  "single_word": false,
587
  "special": false
588
  },
589
- "73": {
590
- "content": "<unused66>",
591
  "lstrip": false,
592
  "normalized": false,
593
  "rstrip": false,
594
  "single_word": false,
595
  "special": false
596
  },
597
- "74": {
598
- "content": "<unused67>",
599
  "lstrip": false,
600
  "normalized": false,
601
  "rstrip": false,
602
  "single_word": false,
603
  "special": false
604
  },
605
- "75": {
606
- "content": "<unused68>",
607
  "lstrip": false,
608
  "normalized": false,
609
  "rstrip": false,
610
  "single_word": false,
611
  "special": false
612
  },
613
- "76": {
614
- "content": "<unused69>",
615
  "lstrip": false,
616
  "normalized": false,
617
  "rstrip": false,
618
  "single_word": false,
619
  "special": false
620
  },
621
- "77": {
622
- "content": "<unused70>",
623
- "lstrip": false,
624
- "normalized": false,
625
- "rstrip": false,
626
- "single_word": false,
627
- "special": false
628
- },
629
- "78": {
630
- "content": "<unused71>",
631
- "lstrip": false,
632
- "normalized": false,
633
- "rstrip": false,
634
- "single_word": false,
635
- "special": false
636
- },
637
- "79": {
638
- "content": "<unused72>",
639
- "lstrip": false,
640
- "normalized": false,
641
- "rstrip": false,
642
- "single_word": false,
643
- "special": false
644
- },
645
- "80": {
646
- "content": "<unused73>",
647
- "lstrip": false,
648
- "normalized": false,
649
- "rstrip": false,
650
- "single_word": false,
651
- "special": false
652
- },
653
- "81": {
654
- "content": "<unused74>",
655
- "lstrip": false,
656
- "normalized": false,
657
- "rstrip": false,
658
- "single_word": false,
659
- "special": false
660
- },
661
- "82": {
662
- "content": "<unused75>",
663
- "lstrip": false,
664
- "normalized": false,
665
- "rstrip": false,
666
- "single_word": false,
667
- "special": false
668
- },
669
- "83": {
670
- "content": "<unused76>",
671
- "lstrip": false,
672
- "normalized": false,
673
- "rstrip": false,
674
- "single_word": false,
675
- "special": false
676
- },
677
- "84": {
678
- "content": "<unused77>",
679
- "lstrip": false,
680
- "normalized": false,
681
- "rstrip": false,
682
- "single_word": false,
683
- "special": false
684
- },
685
- "85": {
686
- "content": "<unused78>",
687
- "lstrip": false,
688
- "normalized": false,
689
- "rstrip": false,
690
- "single_word": false,
691
- "special": false
692
- },
693
- "86": {
694
- "content": "<unused79>",
695
- "lstrip": false,
696
- "normalized": false,
697
- "rstrip": false,
698
- "single_word": false,
699
- "special": false
700
- },
701
- "87": {
702
- "content": "<unused80>",
703
- "lstrip": false,
704
- "normalized": false,
705
- "rstrip": false,
706
- "single_word": false,
707
- "special": false
708
- },
709
- "88": {
710
- "content": "<unused81>",
711
- "lstrip": false,
712
- "normalized": false,
713
- "rstrip": false,
714
- "single_word": false,
715
- "special": false
716
- },
717
- "89": {
718
- "content": "<unused82>",
719
- "lstrip": false,
720
- "normalized": false,
721
- "rstrip": false,
722
- "single_word": false,
723
- "special": false
724
- },
725
- "90": {
726
- "content": "<unused83>",
727
- "lstrip": false,
728
- "normalized": false,
729
- "rstrip": false,
730
- "single_word": false,
731
- "special": false
732
- },
733
- "91": {
734
- "content": "<unused84>",
735
- "lstrip": false,
736
- "normalized": false,
737
- "rstrip": false,
738
- "single_word": false,
739
- "special": false
740
- },
741
- "92": {
742
- "content": "<unused85>",
743
- "lstrip": false,
744
- "normalized": false,
745
- "rstrip": false,
746
- "single_word": false,
747
- "special": false
748
- },
749
- "93": {
750
- "content": "<unused86>",
751
- "lstrip": false,
752
- "normalized": false,
753
- "rstrip": false,
754
- "single_word": false,
755
- "special": false
756
- },
757
- "94": {
758
- "content": "<unused87>",
759
- "lstrip": false,
760
- "normalized": false,
761
- "rstrip": false,
762
- "single_word": false,
763
- "special": false
764
- },
765
- "95": {
766
- "content": "<unused88>",
767
- "lstrip": false,
768
- "normalized": false,
769
- "rstrip": false,
770
- "single_word": false,
771
- "special": false
772
- },
773
- "96": {
774
- "content": "<unused89>",
775
- "lstrip": false,
776
- "normalized": false,
777
- "rstrip": false,
778
- "single_word": false,
779
- "special": false
780
- },
781
- "97": {
782
- "content": "<unused90>",
783
- "lstrip": false,
784
- "normalized": false,
785
- "rstrip": false,
786
- "single_word": false,
787
- "special": false
788
- },
789
- "98": {
790
- "content": "<unused91>",
791
- "lstrip": false,
792
- "normalized": false,
793
- "rstrip": false,
794
- "single_word": false,
795
- "special": false
796
- },
797
- "99": {
798
- "content": "<unused92>",
799
- "lstrip": false,
800
- "normalized": false,
801
- "rstrip": false,
802
- "single_word": false,
803
- "special": false
804
- },
805
- "100": {
806
- "content": "<unused93>",
807
- "lstrip": false,
808
- "normalized": false,
809
- "rstrip": false,
810
- "single_word": false,
811
- "special": false
812
- },
813
- "101": {
814
- "content": "<unused94>",
815
- "lstrip": false,
816
- "normalized": false,
817
- "rstrip": false,
818
- "single_word": false,
819
- "special": false
820
- },
821
- "102": {
822
- "content": "<unused95>",
823
- "lstrip": false,
824
- "normalized": false,
825
- "rstrip": false,
826
- "single_word": false,
827
- "special": false
828
- },
829
- "103": {
830
- "content": "<unused96>",
831
- "lstrip": false,
832
- "normalized": false,
833
- "rstrip": false,
834
- "single_word": false,
835
- "special": false
836
- },
837
- "104": {
838
- "content": "<unused97>",
839
- "lstrip": false,
840
- "normalized": false,
841
- "rstrip": false,
842
- "single_word": false,
843
- "special": false
844
- },
845
- "105": {
846
- "content": "<unused98>",
847
- "lstrip": false,
848
- "normalized": false,
849
- "rstrip": false,
850
- "single_word": false,
851
- "special": false
852
- },
853
- "106": {
854
- "content": "<start_of_turn>",
855
- "lstrip": false,
856
- "normalized": false,
857
- "rstrip": false,
858
- "single_word": false,
859
- "special": true
860
- },
861
- "107": {
862
- "content": "<end_of_turn>",
863
- "lstrip": false,
864
- "normalized": false,
865
- "rstrip": false,
866
- "single_word": false,
867
- "special": true
868
- },
869
- "108": {
870
- "content": "\n",
871
- "lstrip": false,
872
- "normalized": false,
873
- "rstrip": false,
874
- "single_word": false,
875
- "special": false
876
- },
877
- "109": {
878
- "content": "\n\n",
879
- "lstrip": false,
880
- "normalized": false,
881
- "rstrip": false,
882
- "single_word": false,
883
- "special": false
884
- },
885
- "110": {
886
- "content": "\n\n\n",
887
- "lstrip": false,
888
- "normalized": false,
889
- "rstrip": false,
890
- "single_word": false,
891
- "special": false
892
- },
893
- "111": {
894
- "content": "\n\n\n\n",
895
- "lstrip": false,
896
- "normalized": false,
897
- "rstrip": false,
898
- "single_word": false,
899
- "special": false
900
- },
901
- "112": {
902
- "content": "\n\n\n\n\n",
903
- "lstrip": false,
904
- "normalized": false,
905
- "rstrip": false,
906
- "single_word": false,
907
- "special": false
908
- },
909
- "113": {
910
- "content": "\n\n\n\n\n\n",
911
- "lstrip": false,
912
- "normalized": false,
913
- "rstrip": false,
914
- "single_word": false,
915
- "special": false
916
- },
917
- "114": {
918
- "content": "\n\n\n\n\n\n\n",
919
- "lstrip": false,
920
- "normalized": false,
921
- "rstrip": false,
922
- "single_word": false,
923
- "special": false
924
- },
925
- "115": {
926
- "content": "\n\n\n\n\n\n\n\n",
927
- "lstrip": false,
928
- "normalized": false,
929
- "rstrip": false,
930
- "single_word": false,
931
- "special": false
932
- },
933
- "116": {
934
- "content": "\n\n\n\n\n\n\n\n\n",
935
- "lstrip": false,
936
- "normalized": false,
937
- "rstrip": false,
938
- "single_word": false,
939
- "special": false
940
- },
941
- "117": {
942
- "content": "\n\n\n\n\n\n\n\n\n\n",
943
- "lstrip": false,
944
- "normalized": false,
945
- "rstrip": false,
946
- "single_word": false,
947
- "special": false
948
- },
949
- "118": {
950
- "content": "\n\n\n\n\n\n\n\n\n\n\n",
951
- "lstrip": false,
952
- "normalized": false,
953
- "rstrip": false,
954
- "single_word": false,
955
- "special": false
956
- },
957
- "119": {
958
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
959
- "lstrip": false,
960
- "normalized": false,
961
- "rstrip": false,
962
- "single_word": false,
963
- "special": false
964
- },
965
- "120": {
966
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
967
- "lstrip": false,
968
- "normalized": false,
969
- "rstrip": false,
970
- "single_word": false,
971
- "special": false
972
- },
973
- "121": {
974
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
975
- "lstrip": false,
976
- "normalized": false,
977
- "rstrip": false,
978
- "single_word": false,
979
- "special": false
980
- },
981
- "122": {
982
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
983
- "lstrip": false,
984
- "normalized": false,
985
- "rstrip": false,
986
- "single_word": false,
987
- "special": false
988
- },
989
- "123": {
990
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
991
- "lstrip": false,
992
- "normalized": false,
993
- "rstrip": false,
994
- "single_word": false,
995
- "special": false
996
- },
997
- "124": {
998
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
999
- "lstrip": false,
1000
- "normalized": false,
1001
- "rstrip": false,
1002
- "single_word": false,
1003
- "special": false
1004
- },
1005
- "125": {
1006
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1007
- "lstrip": false,
1008
- "normalized": false,
1009
- "rstrip": false,
1010
- "single_word": false,
1011
- "special": false
1012
- },
1013
- "126": {
1014
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1015
- "lstrip": false,
1016
- "normalized": false,
1017
- "rstrip": false,
1018
- "single_word": false,
1019
- "special": false
1020
- },
1021
- "127": {
1022
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1023
- "lstrip": false,
1024
- "normalized": false,
1025
- "rstrip": false,
1026
- "single_word": false,
1027
- "special": false
1028
- },
1029
- "128": {
1030
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1031
- "lstrip": false,
1032
- "normalized": false,
1033
- "rstrip": false,
1034
- "single_word": false,
1035
- "special": false
1036
- },
1037
- "129": {
1038
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1039
- "lstrip": false,
1040
- "normalized": false,
1041
- "rstrip": false,
1042
- "single_word": false,
1043
- "special": false
1044
- },
1045
- "130": {
1046
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1047
- "lstrip": false,
1048
- "normalized": false,
1049
- "rstrip": false,
1050
- "single_word": false,
1051
- "special": false
1052
- },
1053
- "131": {
1054
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1055
- "lstrip": false,
1056
- "normalized": false,
1057
- "rstrip": false,
1058
- "single_word": false,
1059
- "special": false
1060
- },
1061
- "132": {
1062
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1063
- "lstrip": false,
1064
- "normalized": false,
1065
- "rstrip": false,
1066
- "single_word": false,
1067
- "special": false
1068
- },
1069
- "133": {
1070
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1071
- "lstrip": false,
1072
- "normalized": false,
1073
- "rstrip": false,
1074
- "single_word": false,
1075
- "special": false
1076
- },
1077
- "134": {
1078
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1079
- "lstrip": false,
1080
- "normalized": false,
1081
- "rstrip": false,
1082
- "single_word": false,
1083
- "special": false
1084
- },
1085
- "135": {
1086
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1087
- "lstrip": false,
1088
- "normalized": false,
1089
- "rstrip": false,
1090
- "single_word": false,
1091
- "special": false
1092
- },
1093
- "136": {
1094
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1095
- "lstrip": false,
1096
- "normalized": false,
1097
- "rstrip": false,
1098
- "single_word": false,
1099
- "special": false
1100
- },
1101
- "137": {
1102
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1103
- "lstrip": false,
1104
- "normalized": false,
1105
- "rstrip": false,
1106
- "single_word": false,
1107
- "special": false
1108
- },
1109
- "138": {
1110
- "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1111
- "lstrip": false,
1112
- "normalized": false,
1113
- "rstrip": false,
1114
- "single_word": false,
1115
- "special": false
1116
- },
1117
- "139": {
1118
- "content": "▁▁",
1119
- "lstrip": false,
1120
- "normalized": false,
1121
- "rstrip": false,
1122
- "single_word": false,
1123
- "special": false
1124
- },
1125
- "140": {
1126
- "content": "▁▁▁",
1127
- "lstrip": false,
1128
- "normalized": false,
1129
- "rstrip": false,
1130
- "single_word": false,
1131
- "special": false
1132
- },
1133
- "141": {
1134
- "content": "▁▁▁▁",
1135
- "lstrip": false,
1136
- "normalized": false,
1137
- "rstrip": false,
1138
- "single_word": false,
1139
- "special": false
1140
- },
1141
- "142": {
1142
- "content": "▁▁▁▁▁",
1143
- "lstrip": false,
1144
- "normalized": false,
1145
- "rstrip": false,
1146
- "single_word": false,
1147
- "special": false
1148
- },
1149
- "143": {
1150
- "content": "▁▁▁▁▁▁",
1151
- "lstrip": false,
1152
- "normalized": false,
1153
- "rstrip": false,
1154
- "single_word": false,
1155
- "special": false
1156
- },
1157
- "144": {
1158
- "content": "▁▁▁▁▁▁▁",
1159
- "lstrip": false,
1160
- "normalized": false,
1161
- "rstrip": false,
1162
- "single_word": false,
1163
- "special": false
1164
- },
1165
- "145": {
1166
- "content": "▁▁▁▁▁▁▁▁",
1167
- "lstrip": false,
1168
- "normalized": false,
1169
- "rstrip": false,
1170
- "single_word": false,
1171
- "special": false
1172
- },
1173
- "146": {
1174
- "content": "▁▁▁▁▁▁▁▁▁",
1175
- "lstrip": false,
1176
- "normalized": false,
1177
- "rstrip": false,
1178
- "single_word": false,
1179
- "special": false
1180
- },
1181
- "147": {
1182
- "content": "▁▁▁▁▁▁▁▁▁▁",
1183
- "lstrip": false,
1184
- "normalized": false,
1185
- "rstrip": false,
1186
- "single_word": false,
1187
- "special": false
1188
- },
1189
- "148": {
1190
- "content": "▁▁▁▁▁▁▁▁▁▁▁",
1191
- "lstrip": false,
1192
- "normalized": false,
1193
- "rstrip": false,
1194
- "single_word": false,
1195
- "special": false
1196
- },
1197
- "149": {
1198
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
1199
- "lstrip": false,
1200
- "normalized": false,
1201
- "rstrip": false,
1202
- "single_word": false,
1203
- "special": false
1204
- },
1205
- "150": {
1206
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
1207
- "lstrip": false,
1208
- "normalized": false,
1209
- "rstrip": false,
1210
- "single_word": false,
1211
- "special": false
1212
- },
1213
- "151": {
1214
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1215
- "lstrip": false,
1216
- "normalized": false,
1217
- "rstrip": false,
1218
- "single_word": false,
1219
- "special": false
1220
- },
1221
- "152": {
1222
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1223
- "lstrip": false,
1224
- "normalized": false,
1225
- "rstrip": false,
1226
- "single_word": false,
1227
- "special": false
1228
- },
1229
- "153": {
1230
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1231
- "lstrip": false,
1232
- "normalized": false,
1233
- "rstrip": false,
1234
- "single_word": false,
1235
- "special": false
1236
- },
1237
- "154": {
1238
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1239
- "lstrip": false,
1240
- "normalized": false,
1241
- "rstrip": false,
1242
- "single_word": false,
1243
- "special": false
1244
- },
1245
- "155": {
1246
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1247
- "lstrip": false,
1248
- "normalized": false,
1249
- "rstrip": false,
1250
- "single_word": false,
1251
- "special": false
1252
- },
1253
- "156": {
1254
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1255
- "lstrip": false,
1256
- "normalized": false,
1257
- "rstrip": false,
1258
- "single_word": false,
1259
- "special": false
1260
- },
1261
- "157": {
1262
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1263
- "lstrip": false,
1264
- "normalized": false,
1265
- "rstrip": false,
1266
- "single_word": false,
1267
- "special": false
1268
- },
1269
- "158": {
1270
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1271
- "lstrip": false,
1272
- "normalized": false,
1273
- "rstrip": false,
1274
- "single_word": false,
1275
- "special": false
1276
- },
1277
- "159": {
1278
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1279
- "lstrip": false,
1280
- "normalized": false,
1281
- "rstrip": false,
1282
- "single_word": false,
1283
- "special": false
1284
- },
1285
- "160": {
1286
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1287
- "lstrip": false,
1288
- "normalized": false,
1289
- "rstrip": false,
1290
- "single_word": false,
1291
- "special": false
1292
- },
1293
- "161": {
1294
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1295
- "lstrip": false,
1296
- "normalized": false,
1297
- "rstrip": false,
1298
- "single_word": false,
1299
- "special": false
1300
- },
1301
- "162": {
1302
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1303
- "lstrip": false,
1304
- "normalized": false,
1305
- "rstrip": false,
1306
- "single_word": false,
1307
- "special": false
1308
- },
1309
- "163": {
1310
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1311
- "lstrip": false,
1312
- "normalized": false,
1313
- "rstrip": false,
1314
- "single_word": false,
1315
- "special": false
1316
- },
1317
- "164": {
1318
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1319
- "lstrip": false,
1320
- "normalized": false,
1321
- "rstrip": false,
1322
- "single_word": false,
1323
- "special": false
1324
- },
1325
- "165": {
1326
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1327
- "lstrip": false,
1328
- "normalized": false,
1329
- "rstrip": false,
1330
- "single_word": false,
1331
- "special": false
1332
- },
1333
- "166": {
1334
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1335
- "lstrip": false,
1336
- "normalized": false,
1337
- "rstrip": false,
1338
- "single_word": false,
1339
- "special": false
1340
- },
1341
- "167": {
1342
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1343
- "lstrip": false,
1344
- "normalized": false,
1345
- "rstrip": false,
1346
- "single_word": false,
1347
- "special": false
1348
- },
1349
- "168": {
1350
- "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1351
- "lstrip": false,
1352
- "normalized": false,
1353
- "rstrip": false,
1354
- "single_word": false,
1355
- "special": false
1356
- },
1357
- "169": {
1358
- "content": "<table>",
1359
- "lstrip": false,
1360
- "normalized": false,
1361
- "rstrip": false,
1362
- "single_word": false,
1363
- "special": false
1364
- },
1365
- "170": {
1366
- "content": "<caption>",
1367
- "lstrip": false,
1368
- "normalized": false,
1369
- "rstrip": false,
1370
- "single_word": false,
1371
- "special": false
1372
- },
1373
- "171": {
1374
- "content": "<thead>",
1375
- "lstrip": false,
1376
- "normalized": false,
1377
- "rstrip": false,
1378
- "single_word": false,
1379
- "special": false
1380
- },
1381
- "172": {
1382
- "content": "<tbody>",
1383
- "lstrip": false,
1384
- "normalized": false,
1385
- "rstrip": false,
1386
- "single_word": false,
1387
- "special": false
1388
- },
1389
- "173": {
1390
- "content": "<tfoot>",
1391
- "lstrip": false,
1392
- "normalized": false,
1393
- "rstrip": false,
1394
- "single_word": false,
1395
- "special": false
1396
- },
1397
- "174": {
1398
- "content": "<tr>",
1399
- "lstrip": false,
1400
- "normalized": false,
1401
- "rstrip": false,
1402
- "single_word": false,
1403
- "special": false
1404
- },
1405
- "175": {
1406
- "content": "<th>",
1407
- "lstrip": false,
1408
- "normalized": false,
1409
- "rstrip": false,
1410
- "single_word": false,
1411
- "special": false
1412
- },
1413
- "176": {
1414
- "content": "<td>",
1415
- "lstrip": false,
1416
- "normalized": false,
1417
- "rstrip": false,
1418
- "single_word": false,
1419
- "special": false
1420
- },
1421
- "177": {
1422
- "content": "</table>",
1423
- "lstrip": false,
1424
- "normalized": false,
1425
- "rstrip": false,
1426
- "single_word": false,
1427
- "special": false
1428
- },
1429
- "178": {
1430
- "content": "</caption>",
1431
- "lstrip": false,
1432
- "normalized": false,
1433
- "rstrip": false,
1434
- "single_word": false,
1435
- "special": false
1436
- },
1437
- "179": {
1438
- "content": "</thead>",
1439
- "lstrip": false,
1440
- "normalized": false,
1441
- "rstrip": false,
1442
- "single_word": false,
1443
- "special": false
1444
- },
1445
- "180": {
1446
- "content": "</tbody>",
1447
- "lstrip": false,
1448
- "normalized": false,
1449
- "rstrip": false,
1450
- "single_word": false,
1451
- "special": false
1452
- },
1453
- "181": {
1454
- "content": "</tfoot>",
1455
- "lstrip": false,
1456
- "normalized": false,
1457
- "rstrip": false,
1458
- "single_word": false,
1459
- "special": false
1460
- },
1461
- "182": {
1462
- "content": "</tr>",
1463
- "lstrip": false,
1464
- "normalized": false,
1465
- "rstrip": false,
1466
- "single_word": false,
1467
- "special": false
1468
- },
1469
- "183": {
1470
- "content": "</th>",
1471
- "lstrip": false,
1472
- "normalized": false,
1473
- "rstrip": false,
1474
- "single_word": false,
1475
- "special": false
1476
- },
1477
- "184": {
1478
- "content": "</td>",
1479
- "lstrip": false,
1480
- "normalized": false,
1481
- "rstrip": false,
1482
- "single_word": false,
1483
- "special": false
1484
- },
1485
- "185": {
1486
- "content": "<h1>",
1487
- "lstrip": false,
1488
- "normalized": false,
1489
- "rstrip": false,
1490
- "single_word": false,
1491
- "special": false
1492
- },
1493
- "186": {
1494
- "content": "<h2>",
1495
- "lstrip": false,
1496
- "normalized": false,
1497
- "rstrip": false,
1498
- "single_word": false,
1499
- "special": false
1500
- },
1501
- "187": {
1502
- "content": "<h3>",
1503
- "lstrip": false,
1504
- "normalized": false,
1505
- "rstrip": false,
1506
- "single_word": false,
1507
- "special": false
1508
- },
1509
- "188": {
1510
- "content": "<h4>",
1511
- "lstrip": false,
1512
- "normalized": false,
1513
- "rstrip": false,
1514
- "single_word": false,
1515
- "special": false
1516
- },
1517
- "189": {
1518
- "content": "<h5>",
1519
- "lstrip": false,
1520
- "normalized": false,
1521
- "rstrip": false,
1522
- "single_word": false,
1523
- "special": false
1524
- },
1525
- "190": {
1526
- "content": "<h6>",
1527
- "lstrip": false,
1528
- "normalized": false,
1529
- "rstrip": false,
1530
- "single_word": false,
1531
- "special": false
1532
- },
1533
- "191": {
1534
- "content": "<blockquote>",
1535
- "lstrip": false,
1536
- "normalized": false,
1537
- "rstrip": false,
1538
- "single_word": false,
1539
- "special": false
1540
- },
1541
- "192": {
1542
- "content": "</h1>",
1543
- "lstrip": false,
1544
- "normalized": false,
1545
- "rstrip": false,
1546
- "single_word": false,
1547
- "special": false
1548
- },
1549
- "193": {
1550
- "content": "</h2>",
1551
- "lstrip": false,
1552
- "normalized": false,
1553
- "rstrip": false,
1554
- "single_word": false,
1555
- "special": false
1556
- },
1557
- "194": {
1558
- "content": "</h3>",
1559
- "lstrip": false,
1560
- "normalized": false,
1561
- "rstrip": false,
1562
- "single_word": false,
1563
- "special": false
1564
- },
1565
- "195": {
1566
- "content": "</h4>",
1567
- "lstrip": false,
1568
- "normalized": false,
1569
- "rstrip": false,
1570
- "single_word": false,
1571
- "special": false
1572
- },
1573
- "196": {
1574
- "content": "</h5>",
1575
- "lstrip": false,
1576
- "normalized": false,
1577
- "rstrip": false,
1578
- "single_word": false,
1579
- "special": false
1580
- },
1581
- "197": {
1582
- "content": "</h6>",
1583
- "lstrip": false,
1584
- "normalized": false,
1585
- "rstrip": false,
1586
- "single_word": false,
1587
- "special": false
1588
- },
1589
- "198": {
1590
- "content": "</blockquote>",
1591
- "lstrip": false,
1592
- "normalized": false,
1593
- "rstrip": false,
1594
- "single_word": false,
1595
- "special": false
1596
- },
1597
- "199": {
1598
- "content": "<strong>",
1599
- "lstrip": false,
1600
- "normalized": false,
1601
- "rstrip": false,
1602
- "single_word": false,
1603
- "special": false
1604
- },
1605
- "200": {
1606
- "content": "<em>",
1607
- "lstrip": false,
1608
- "normalized": false,
1609
- "rstrip": false,
1610
- "single_word": false,
1611
- "special": false
1612
- },
1613
- "201": {
1614
- "content": "<b>",
1615
- "lstrip": false,
1616
- "normalized": false,
1617
- "rstrip": false,
1618
- "single_word": false,
1619
- "special": false
1620
- },
1621
- "202": {
1622
- "content": "<i>",
1623
- "lstrip": false,
1624
- "normalized": false,
1625
- "rstrip": false,
1626
- "single_word": false,
1627
- "special": false
1628
- },
1629
- "203": {
1630
- "content": "<u>",
1631
- "lstrip": false,
1632
- "normalized": false,
1633
- "rstrip": false,
1634
- "single_word": false,
1635
- "special": false
1636
- },
1637
- "204": {
1638
- "content": "<s>",
1639
- "lstrip": false,
1640
- "normalized": false,
1641
- "rstrip": false,
1642
- "single_word": false,
1643
- "special": false
1644
- },
1645
- "205": {
1646
- "content": "<sub>",
1647
- "lstrip": false,
1648
- "normalized": false,
1649
- "rstrip": false,
1650
- "single_word": false,
1651
- "special": false
1652
- },
1653
- "206": {
1654
- "content": "<sup>",
1655
- "lstrip": false,
1656
- "normalized": false,
1657
- "rstrip": false,
1658
- "single_word": false,
1659
- "special": false
1660
- },
1661
- "207": {
1662
- "content": "<code>",
1663
- "lstrip": false,
1664
- "normalized": false,
1665
- "rstrip": false,
1666
- "single_word": false,
1667
- "special": false
1668
- },
1669
- "208": {
1670
- "content": "</strong>",
1671
- "lstrip": false,
1672
- "normalized": false,
1673
- "rstrip": false,
1674
- "single_word": false,
1675
- "special": false
1676
- },
1677
- "209": {
1678
- "content": "</em>",
1679
- "lstrip": false,
1680
- "normalized": false,
1681
- "rstrip": false,
1682
- "single_word": false,
1683
- "special": false
1684
- },
1685
- "210": {
1686
- "content": "</b>",
1687
- "lstrip": false,
1688
- "normalized": false,
1689
- "rstrip": false,
1690
- "single_word": false,
1691
- "special": false
1692
- },
1693
- "211": {
1694
- "content": "</i>",
1695
- "lstrip": false,
1696
- "normalized": false,
1697
- "rstrip": false,
1698
- "single_word": false,
1699
- "special": false
1700
- },
1701
- "212": {
1702
- "content": "</u>",
1703
- "lstrip": false,
1704
- "normalized": false,
1705
- "rstrip": false,
1706
- "single_word": false,
1707
- "special": false
1708
- },
1709
- "213": {
1710
- "content": "</s>",
1711
- "lstrip": false,
1712
- "normalized": false,
1713
- "rstrip": false,
1714
- "single_word": false,
1715
- "special": false
1716
- },
1717
- "214": {
1718
- "content": "</sub>",
1719
- "lstrip": false,
1720
- "normalized": false,
1721
- "rstrip": false,
1722
- "single_word": false,
1723
- "special": false
1724
- },
1725
- "215": {
1726
- "content": "</sup>",
1727
- "lstrip": false,
1728
- "normalized": false,
1729
- "rstrip": false,
1730
- "single_word": false,
1731
- "special": false
1732
- },
1733
- "216": {
1734
- "content": "</code>",
1735
- "lstrip": false,
1736
- "normalized": false,
1737
- "rstrip": false,
1738
- "single_word": false,
1739
- "special": false
1740
- },
1741
- "255968": {
1742
- "content": "[toxicity=0]",
1743
- "lstrip": false,
1744
- "normalized": false,
1745
- "rstrip": false,
1746
- "single_word": false,
1747
- "special": false
1748
- },
1749
- "255969": {
1750
- "content": "\t\t",
1751
- "lstrip": false,
1752
- "normalized": false,
1753
- "rstrip": false,
1754
- "single_word": false,
1755
- "special": false
1756
- },
1757
- "255970": {
1758
- "content": "\t\t\t",
1759
- "lstrip": false,
1760
- "normalized": false,
1761
- "rstrip": false,
1762
- "single_word": false,
1763
- "special": false
1764
- },
1765
- "255971": {
1766
- "content": "\t\t\t\t",
1767
- "lstrip": false,
1768
- "normalized": false,
1769
- "rstrip": false,
1770
- "single_word": false,
1771
- "special": false
1772
- },
1773
- "255972": {
1774
- "content": "\t\t\t\t\t",
1775
- "lstrip": false,
1776
- "normalized": false,
1777
- "rstrip": false,
1778
- "single_word": false,
1779
- "special": false
1780
- },
1781
- "255973": {
1782
- "content": "\t\t\t\t\t\t",
1783
- "lstrip": false,
1784
- "normalized": false,
1785
- "rstrip": false,
1786
- "single_word": false,
1787
- "special": false
1788
- },
1789
- "255974": {
1790
- "content": "\t\t\t\t\t\t\t",
1791
- "lstrip": false,
1792
- "normalized": false,
1793
- "rstrip": false,
1794
- "single_word": false,
1795
- "special": false
1796
- },
1797
- "255975": {
1798
- "content": "\t\t\t\t\t\t\t\t",
1799
- "lstrip": false,
1800
- "normalized": false,
1801
- "rstrip": false,
1802
- "single_word": false,
1803
- "special": false
1804
- },
1805
- "255976": {
1806
- "content": "\t\t\t\t\t\t\t\t\t",
1807
- "lstrip": false,
1808
- "normalized": false,
1809
- "rstrip": false,
1810
- "single_word": false,
1811
- "special": false
1812
- },
1813
- "255977": {
1814
- "content": "\t\t\t\t\t\t\t\t\t\t",
1815
- "lstrip": false,
1816
- "normalized": false,
1817
- "rstrip": false,
1818
- "single_word": false,
1819
- "special": false
1820
- },
1821
- "255978": {
1822
- "content": "\t\t\t\t\t\t\t\t\t\t\t",
1823
- "lstrip": false,
1824
- "normalized": false,
1825
- "rstrip": false,
1826
- "single_word": false,
1827
- "special": false
1828
- },
1829
- "255979": {
1830
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t",
1831
- "lstrip": false,
1832
- "normalized": false,
1833
- "rstrip": false,
1834
- "single_word": false,
1835
- "special": false
1836
- },
1837
- "255980": {
1838
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t",
1839
- "lstrip": false,
1840
- "normalized": false,
1841
- "rstrip": false,
1842
- "single_word": false,
1843
- "special": false
1844
- },
1845
- "255981": {
1846
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1847
- "lstrip": false,
1848
- "normalized": false,
1849
- "rstrip": false,
1850
- "single_word": false,
1851
- "special": false
1852
- },
1853
- "255982": {
1854
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1855
- "lstrip": false,
1856
- "normalized": false,
1857
- "rstrip": false,
1858
- "single_word": false,
1859
- "special": false
1860
- },
1861
- "255983": {
1862
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1863
- "lstrip": false,
1864
- "normalized": false,
1865
- "rstrip": false,
1866
- "single_word": false,
1867
- "special": false
1868
- },
1869
- "255984": {
1870
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1871
- "lstrip": false,
1872
- "normalized": false,
1873
- "rstrip": false,
1874
- "single_word": false,
1875
- "special": false
1876
- },
1877
- "255985": {
1878
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1879
- "lstrip": false,
1880
- "normalized": false,
1881
- "rstrip": false,
1882
- "single_word": false,
1883
- "special": false
1884
- },
1885
- "255986": {
1886
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1887
- "lstrip": false,
1888
- "normalized": false,
1889
- "rstrip": false,
1890
- "single_word": false,
1891
- "special": false
1892
- },
1893
- "255987": {
1894
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1895
- "lstrip": false,
1896
- "normalized": false,
1897
- "rstrip": false,
1898
- "single_word": false,
1899
- "special": false
1900
- },
1901
- "255988": {
1902
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1903
- "lstrip": false,
1904
- "normalized": false,
1905
- "rstrip": false,
1906
- "single_word": false,
1907
- "special": false
1908
- },
1909
- "255989": {
1910
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1911
- "lstrip": false,
1912
- "normalized": false,
1913
- "rstrip": false,
1914
- "single_word": false,
1915
- "special": false
1916
- },
1917
- "255990": {
1918
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1919
- "lstrip": false,
1920
- "normalized": false,
1921
- "rstrip": false,
1922
- "single_word": false,
1923
- "special": false
1924
- },
1925
- "255991": {
1926
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1927
- "lstrip": false,
1928
- "normalized": false,
1929
- "rstrip": false,
1930
- "single_word": false,
1931
- "special": false
1932
- },
1933
- "255992": {
1934
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1935
- "lstrip": false,
1936
- "normalized": false,
1937
- "rstrip": false,
1938
- "single_word": false,
1939
- "special": false
1940
- },
1941
- "255993": {
1942
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1943
- "lstrip": false,
1944
- "normalized": false,
1945
- "rstrip": false,
1946
- "single_word": false,
1947
- "special": false
1948
- },
1949
- "255994": {
1950
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1951
- "lstrip": false,
1952
- "normalized": false,
1953
- "rstrip": false,
1954
- "single_word": false,
1955
- "special": false
1956
- },
1957
- "255995": {
1958
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1959
- "lstrip": false,
1960
- "normalized": false,
1961
- "rstrip": false,
1962
- "single_word": false,
1963
- "special": false
1964
- },
1965
- "255996": {
1966
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1967
- "lstrip": false,
1968
- "normalized": false,
1969
- "rstrip": false,
1970
- "single_word": false,
1971
- "special": false
1972
- },
1973
- "255997": {
1974
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1975
- "lstrip": false,
1976
- "normalized": false,
1977
- "rstrip": false,
1978
- "single_word": false,
1979
- "special": false
1980
- },
1981
- "255998": {
1982
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1983
- "lstrip": false,
1984
- "normalized": false,
1985
- "rstrip": false,
1986
- "single_word": false,
1987
- "special": false
1988
- },
1989
- "255999": {
1990
- "content": "<unused99>",
1991
  "lstrip": false,
1992
  "normalized": false,
1993
  "rstrip": false,
@@ -1995,19 +300,31 @@
1995
  "special": false
1996
  }
1997
  },
1998
- "additional_special_tokens": [
1999
- "<start_of_turn>",
2000
- "<end_of_turn>"
 
 
 
 
 
 
 
 
 
 
 
2001
  ],
2002
- "bos_token": "<bos>",
2003
- "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
- "eos_token": "<eos>",
 
 
2006
  "model_max_length": 1000000000000000019884624838656,
2007
- "pad_token": "<pad>",
2008
  "sp_model_kwargs": {},
2009
  "spaces_between_special_tokens": false,
2010
- "tokenizer_class": "GemmaTokenizer",
2011
- "unk_token": "<unk>",
2012
- "use_default_system_prompt": false
 
2013
  }
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "0": {
7
+ "content": "<PAD>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
12
  "special": true
13
  },
14
  "1": {
15
+ "content": "<UNK>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
 
20
  "special": true
21
  },
22
  "2": {
23
+ "content": "<CLS>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
 
28
  "special": true
29
  },
30
  "3": {
31
+ "content": "<SEP>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
 
36
  "special": true
37
  },
38
  "4": {
39
+ "content": "<MASK_TOKEN>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false,
44
+ "special": true
45
  },
46
  "5": {
47
+ "content": "<BOS_TOKEN>",
48
  "lstrip": false,
49
  "normalized": false,
50
  "rstrip": false,
51
  "single_word": false,
52
+ "special": true
53
  },
54
  "6": {
55
+ "content": "<EOS_TOKEN>",
56
  "lstrip": false,
57
  "normalized": false,
58
  "rstrip": false,
59
  "single_word": false,
60
+ "special": true
61
  },
62
  "7": {
63
+ "content": "<EOP_TOKEN>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  "lstrip": false,
65
  "normalized": false,
66
  "rstrip": false,
67
  "single_word": false,
68
+ "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  },
70
+ "255000": {
71
+ "content": "<|START_OF_TURN_TOKEN|>",
72
  "lstrip": false,
73
  "normalized": false,
74
  "rstrip": false,
75
  "single_word": false,
76
  "special": false
77
  },
78
+ "255001": {
79
+ "content": "<|END_OF_TURN_TOKEN|>",
80
  "lstrip": false,
81
  "normalized": false,
82
  "rstrip": false,
83
  "single_word": false,
84
+ "special": true
85
  },
86
+ "255002": {
87
+ "content": "<|YES_TOKEN|>",
88
  "lstrip": false,
89
  "normalized": false,
90
  "rstrip": false,
91
  "single_word": false,
92
  "special": false
93
  },
94
+ "255003": {
95
+ "content": "<|NO_TOKEN|>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false,
100
  "special": false
101
  },
102
+ "255004": {
103
+ "content": "<|GOOD_TOKEN|>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
107
  "single_word": false,
108
  "special": false
109
  },
110
+ "255005": {
111
+ "content": "<|BAD_TOKEN|>",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
115
  "single_word": false,
116
  "special": false
117
  },
118
+ "255006": {
119
+ "content": "<|USER_TOKEN|>",
120
  "lstrip": false,
121
  "normalized": false,
122
  "rstrip": false,
123
  "single_word": false,
124
  "special": false
125
  },
126
+ "255007": {
127
+ "content": "<|CHATBOT_TOKEN|>",
128
  "lstrip": false,
129
  "normalized": false,
130
  "rstrip": false,
131
  "single_word": false,
132
  "special": false
133
  },
134
+ "255008": {
135
+ "content": "<|SYSTEM_TOKEN|>",
136
  "lstrip": false,
137
  "normalized": false,
138
  "rstrip": false,
139
  "single_word": false,
140
  "special": false
141
  },
142
+ "255009": {
143
+ "content": "<|USER_0_TOKEN|>",
144
  "lstrip": false,
145
  "normalized": false,
146
  "rstrip": false,
147
  "single_word": false,
148
  "special": false
149
  },
150
+ "255010": {
151
+ "content": "<|USER_1_TOKEN|>",
152
  "lstrip": false,
153
  "normalized": false,
154
  "rstrip": false,
155
  "single_word": false,
156
  "special": false
157
  },
158
+ "255011": {
159
+ "content": "<|USER_2_TOKEN|>",
160
  "lstrip": false,
161
  "normalized": false,
162
  "rstrip": false,
163
  "single_word": false,
164
  "special": false
165
  },
166
+ "255012": {
167
+ "content": "<|USER_3_TOKEN|>",
168
  "lstrip": false,
169
  "normalized": false,
170
  "rstrip": false,
171
  "single_word": false,
172
  "special": false
173
  },
174
+ "255013": {
175
+ "content": "<|USER_4_TOKEN|>",
176
  "lstrip": false,
177
  "normalized": false,
178
  "rstrip": false,
179
  "single_word": false,
180
  "special": false
181
  },
182
+ "255014": {
183
+ "content": "<|USER_5_TOKEN|>",
184
  "lstrip": false,
185
  "normalized": false,
186
  "rstrip": false,
187
  "single_word": false,
188
  "special": false
189
  },
190
+ "255015": {
191
+ "content": "<|USER_6_TOKEN|>",
192
  "lstrip": false,
193
  "normalized": false,
194
  "rstrip": false,
195
  "single_word": false,
196
  "special": false
197
  },
198
+ "255016": {
199
+ "content": "<|USER_7_TOKEN|>",
200
  "lstrip": false,
201
  "normalized": false,
202
  "rstrip": false,
203
  "single_word": false,
204
  "special": false
205
  },
206
+ "255017": {
207
+ "content": "<|USER_8_TOKEN|>",
208
  "lstrip": false,
209
  "normalized": false,
210
  "rstrip": false,
211
  "single_word": false,
212
  "special": false
213
  },
214
+ "255018": {
215
+ "content": "<|USER_9_TOKEN|>",
216
  "lstrip": false,
217
  "normalized": false,
218
  "rstrip": false,
219
  "single_word": false,
220
  "special": false
221
  },
222
+ "255019": {
223
+ "content": "<|EXTRA_0_TOKEN|>",
224
  "lstrip": false,
225
  "normalized": false,
226
  "rstrip": false,
227
  "single_word": false,
228
  "special": false
229
  },
230
+ "255020": {
231
+ "content": "<|EXTRA_1_TOKEN|>",
232
  "lstrip": false,
233
  "normalized": false,
234
  "rstrip": false,
235
  "single_word": false,
236
  "special": false
237
  },
238
+ "255021": {
239
+ "content": "<|EXTRA_2_TOKEN|>",
240
  "lstrip": false,
241
  "normalized": false,
242
  "rstrip": false,
243
  "single_word": false,
244
  "special": false
245
  },
246
+ "255022": {
247
+ "content": "<|EXTRA_3_TOKEN|>",
248
  "lstrip": false,
249
  "normalized": false,
250
  "rstrip": false,
251
  "single_word": false,
252
  "special": false
253
  },
254
+ "255023": {
255
+ "content": "<|EXTRA_4_TOKEN|>",
256
  "lstrip": false,
257
  "normalized": false,
258
  "rstrip": false,
259
  "single_word": false,
260
  "special": false
261
  },
262
+ "255024": {
263
+ "content": "<|EXTRA_5_TOKEN|>",
264
  "lstrip": false,
265
  "normalized": false,
266
  "rstrip": false,
267
  "single_word": false,
268
  "special": false
269
  },
270
+ "255025": {
271
+ "content": "<|EXTRA_6_TOKEN|>",
272
  "lstrip": false,
273
  "normalized": false,
274
  "rstrip": false,
275
  "single_word": false,
276
  "special": false
277
  },
278
+ "255026": {
279
+ "content": "<|EXTRA_7_TOKEN|>",
280
  "lstrip": false,
281
  "normalized": false,
282
  "rstrip": false,
283
  "single_word": false,
284
  "special": false
285
  },
286
+ "255027": {
287
+ "content": "<|EXTRA_8_TOKEN|>",
288
  "lstrip": false,
289
  "normalized": false,
290
  "rstrip": false,
291
  "single_word": false,
292
  "special": false
293
  },
294
+ "255028": {
295
+ "content": "<|EXTRA_9_TOKEN|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  "lstrip": false,
297
  "normalized": false,
298
  "rstrip": false,
 
300
  "special": false
301
  }
302
  },
303
+ "bos_token": "<BOS_TOKEN>",
304
+ "chat_template": [
305
+ {
306
+ "name": "default",
307
+ "template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, multilingual AI-assistant trained to assist human users by providing thorough responses. You are able to interact and respond to questions in 23 languages and you are powered by a multilingual model built by Cohere For AI.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
308
+ },
309
+ {
310
+ "name": "tool_use",
311
+ "template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = '## Task and Context\\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\\'s needs as best you can, which will be wide-ranging.\\n\\n## Style Guide\\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.' %}{% endif %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}{{ '# Safety Preamble' }}{{ '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}{{ '\n\n# System Preamble' }}{{ '\n## Basic Rules' }}{{ '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}{{ '\n\n# User Preamble' }}{{ '\n' + system_message }}{{'\n\n## Available Tools\nHere is a list of tools that you have available to you:\n\n'}}{% for tool in tools %}{% if loop.index0 != 0 %}{{ '\n\n'}}{% endif %}{{'```python\ndef ' + tool.name + '('}}{% for param_name, param_fields in tool.parameter_definitions.items() %}{% if loop.index0 != 0 %}{{ ', '}}{% endif %}{{param_name}}: {% if not param_fields.required %}{{'Optional[' + param_fields.type + '] = None'}}{% else %}{{ param_fields.type }}{% endif %}{% endfor %}{{ ') -> List[Dict]:\n \"\"\"'}}{{ tool.description }}{% if tool.parameter_definitions|length != 0 %}{{ '\n\n Args:\n '}}{% for param_name, param_fields in tool.parameter_definitions.items() %}{% if loop.index0 != 0 %}{{ '\n ' }}{% endif %}{{ param_name + ' ('}}{% if not param_fields.required %}{{'Optional[' + param_fields.type + ']'}}{% else %}{{ param_fields.type }}{% endif %}{{ '): ' + param_fields.description }}{% endfor %}{% endif %}{{ '\n \"\"\"\n pass\n```' }}{% endfor %}{{ '<|END_OF_TURN_TOKEN|>'}}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'system' %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{{'<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \\'Action:\\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n \"tool_name\": title of the tool in the specification,\n \"parameters\": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|>'}}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
312
+ },
313
+ {
314
+ "name": "rag",
315
+ "template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = '## Task and Context\\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\\'s needs as best you can, which will be wide-ranging.\\n\\n## Style Guide\\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.' %}{% endif %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}{{ '# Safety Preamble' }}{{ '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}{{ '\n\n# System Preamble' }}{{ '\n## Basic Rules' }}{{ '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}{{ '\n\n# User Preamble' }}{{ '\n' + system_message }}{{ '<|END_OF_TURN_TOKEN|>'}}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'system' %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>'}}{{ '<results>' }}{% for document in documents %}{{ '\nDocument: ' }}{{ loop.index0 }}\n{% for key, value in document.items() %}{{ key }}: {{value}}\n{% endfor %}{% endfor %}{{ '</results>'}}{{ '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}{{ 'Carefully perform the following instructions, in order, starting each with a new line.\n' }}{{ 'Firstly, Decide which of the retrieved documents are relevant to the user\\'s last input by writing \\'Relevant Documents:\\' followed by comma-separated list of document numbers. If none are relevant, you should instead write \\'None\\'.\n' }}{{ 'Secondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user\\'s last input by writing \\'Cited Documents:\\' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write \\'None\\'.\n' }}{% if citation_mode=='accurate' %}{{ 'Thirdly, Write \\'Answer:\\' followed by a response to the user\\'s last input in high quality natural english. Use the retrieved documents to help you. Do not insert any citations or grounding markup.\n' }}{% endif %}{{ 'Finally, Write \\'Grounded answer:\\' followed by a response to the user\\'s last input in high quality natural english. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.' }}{{ '<|END_OF_TURN_TOKEN|>' }}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
316
+ }
317
  ],
 
 
318
  "clean_up_tokenization_spaces": false,
319
+ "eos_token": "<|END_OF_TURN_TOKEN|>",
320
+ "legacy": true,
321
+ "merges_file": null,
322
  "model_max_length": 1000000000000000019884624838656,
323
+ "pad_token": "<PAD>",
324
  "sp_model_kwargs": {},
325
  "spaces_between_special_tokens": false,
326
+ "tokenizer_class": "CohereTokenizer",
327
+ "unk_token": null,
328
+ "use_default_system_prompt": false,
329
+ "vocab_file": null
330
  }