|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270731": { |
|
"content": "<mask>", |
|
"lstrip": true, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270732": { |
|
"content": "ace_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270733": { |
|
"content": "ace_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270734": { |
|
"content": "acm_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270735": { |
|
"content": "acq_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270736": { |
|
"content": "aeb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270737": { |
|
"content": "afr_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270738": { |
|
"content": "ajp_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270739": { |
|
"content": "aka_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270740": { |
|
"content": "amh_Ethi", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270741": { |
|
"content": "apc_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270742": { |
|
"content": "arb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270743": { |
|
"content": "ars_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270744": { |
|
"content": "ary_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270745": { |
|
"content": "arz_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270746": { |
|
"content": "asm_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270747": { |
|
"content": "ast_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270748": { |
|
"content": "awa_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270749": { |
|
"content": "ayr_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270750": { |
|
"content": "azb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270751": { |
|
"content": "azj_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270752": { |
|
"content": "bak_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270753": { |
|
"content": "bam_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270754": { |
|
"content": "ban_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270755": { |
|
"content": "bel_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270756": { |
|
"content": "bem_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270757": { |
|
"content": "ben_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270758": { |
|
"content": "bho_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270759": { |
|
"content": "bjn_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270760": { |
|
"content": "bjn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270761": { |
|
"content": "bod_Tibt", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270762": { |
|
"content": "bos_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270763": { |
|
"content": "bug_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270764": { |
|
"content": "bul_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270765": { |
|
"content": "cat_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270766": { |
|
"content": "ceb_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270767": { |
|
"content": "ces_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270768": { |
|
"content": "cjk_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270769": { |
|
"content": "ckb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270770": { |
|
"content": "crh_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270771": { |
|
"content": "cym_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270772": { |
|
"content": "dan_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270773": { |
|
"content": "deu_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270774": { |
|
"content": "dik_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270775": { |
|
"content": "dyu_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270776": { |
|
"content": "dzo_Tibt", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270777": { |
|
"content": "ell_Grek", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270778": { |
|
"content": "eng_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270779": { |
|
"content": "epo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270780": { |
|
"content": "est_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270781": { |
|
"content": "eus_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270782": { |
|
"content": "ewe_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270783": { |
|
"content": "fao_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270784": { |
|
"content": "pes_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270785": { |
|
"content": "fij_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270786": { |
|
"content": "fin_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270787": { |
|
"content": "fon_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270788": { |
|
"content": "fra_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270789": { |
|
"content": "fur_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270790": { |
|
"content": "fuv_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270791": { |
|
"content": "gla_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270792": { |
|
"content": "gle_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270793": { |
|
"content": "glg_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270794": { |
|
"content": "grn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270795": { |
|
"content": "guj_Gujr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270796": { |
|
"content": "hat_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270797": { |
|
"content": "hau_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270798": { |
|
"content": "heb_Hebr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270799": { |
|
"content": "hin_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270800": { |
|
"content": "hne_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270801": { |
|
"content": "hrv_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270802": { |
|
"content": "hun_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270803": { |
|
"content": "hye_Armn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270804": { |
|
"content": "ibo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270805": { |
|
"content": "ilo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270806": { |
|
"content": "ind_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270807": { |
|
"content": "isl_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270808": { |
|
"content": "ita_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270809": { |
|
"content": "jav_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270810": { |
|
"content": "jpn_Jpan", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270811": { |
|
"content": "kab_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270812": { |
|
"content": "kac_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270813": { |
|
"content": "kam_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270814": { |
|
"content": "kan_Knda", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270815": { |
|
"content": "kas_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270816": { |
|
"content": "kas_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270817": { |
|
"content": "kat_Geor", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270818": { |
|
"content": "knc_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270819": { |
|
"content": "knc_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270820": { |
|
"content": "kaz_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270821": { |
|
"content": "kbp_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270822": { |
|
"content": "kea_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270823": { |
|
"content": "khm_Khmr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270824": { |
|
"content": "kik_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270825": { |
|
"content": "kin_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270826": { |
|
"content": "kir_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270827": { |
|
"content": "kmb_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270828": { |
|
"content": "kon_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270829": { |
|
"content": "kor_Hang", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270830": { |
|
"content": "kmr_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270831": { |
|
"content": "lao_Laoo", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270832": { |
|
"content": "lvs_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270833": { |
|
"content": "lij_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270834": { |
|
"content": "lim_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270835": { |
|
"content": "lin_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270836": { |
|
"content": "lit_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270837": { |
|
"content": "lmo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270838": { |
|
"content": "ltg_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270839": { |
|
"content": "ltz_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270840": { |
|
"content": "lua_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270841": { |
|
"content": "lug_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270842": { |
|
"content": "luo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270843": { |
|
"content": "lus_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270844": { |
|
"content": "mag_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270845": { |
|
"content": "mai_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270846": { |
|
"content": "mal_Mlym", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270847": { |
|
"content": "mar_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270848": { |
|
"content": "min_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270849": { |
|
"content": "mkd_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270850": { |
|
"content": "plt_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270851": { |
|
"content": "mlt_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270852": { |
|
"content": "mni_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270853": { |
|
"content": "khk_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270854": { |
|
"content": "mos_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270855": { |
|
"content": "mri_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270856": { |
|
"content": "zsm_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270857": { |
|
"content": "mya_Mymr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270858": { |
|
"content": "nld_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270859": { |
|
"content": "nno_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270860": { |
|
"content": "nob_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270861": { |
|
"content": "npi_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270862": { |
|
"content": "nso_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270863": { |
|
"content": "nus_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270864": { |
|
"content": "nya_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270865": { |
|
"content": "oci_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270866": { |
|
"content": "gaz_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270867": { |
|
"content": "ory_Orya", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270868": { |
|
"content": "pag_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270869": { |
|
"content": "pan_Guru", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270870": { |
|
"content": "pap_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270871": { |
|
"content": "pol_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270872": { |
|
"content": "por_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270873": { |
|
"content": "prs_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270874": { |
|
"content": "pbt_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270875": { |
|
"content": "quy_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270876": { |
|
"content": "ron_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270877": { |
|
"content": "run_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270878": { |
|
"content": "rus_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270879": { |
|
"content": "sag_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270880": { |
|
"content": "san_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270881": { |
|
"content": "sat_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270882": { |
|
"content": "scn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270883": { |
|
"content": "shn_Mymr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270884": { |
|
"content": "sin_Sinh", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270885": { |
|
"content": "slk_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270886": { |
|
"content": "slv_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270887": { |
|
"content": "smo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270888": { |
|
"content": "sna_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270889": { |
|
"content": "snd_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270890": { |
|
"content": "som_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270891": { |
|
"content": "sot_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270892": { |
|
"content": "spa_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270893": { |
|
"content": "als_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270894": { |
|
"content": "srd_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270895": { |
|
"content": "srp_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270896": { |
|
"content": "ssw_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270897": { |
|
"content": "sun_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270898": { |
|
"content": "swe_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270899": { |
|
"content": "swh_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270900": { |
|
"content": "szl_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270901": { |
|
"content": "tam_Taml", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270902": { |
|
"content": "tat_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270903": { |
|
"content": "tel_Telu", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270904": { |
|
"content": "tgk_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270905": { |
|
"content": "tgl_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270906": { |
|
"content": "tha_Thai", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270907": { |
|
"content": "tir_Ethi", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270908": { |
|
"content": "taq_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270909": { |
|
"content": "taq_Tfng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270910": { |
|
"content": "tpi_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270911": { |
|
"content": "tsn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270912": { |
|
"content": "tso_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270913": { |
|
"content": "tuk_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270914": { |
|
"content": "tum_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270915": { |
|
"content": "tur_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270916": { |
|
"content": "twi_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270917": { |
|
"content": "tzm_Tfng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270918": { |
|
"content": "uig_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270919": { |
|
"content": "ukr_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270920": { |
|
"content": "umb_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270921": { |
|
"content": "urd_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270922": { |
|
"content": "uzn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270923": { |
|
"content": "vec_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270924": { |
|
"content": "vie_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270925": { |
|
"content": "war_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270926": { |
|
"content": "wol_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270927": { |
|
"content": "xho_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270928": { |
|
"content": "ydd_Hebr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270929": { |
|
"content": "yor_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270930": { |
|
"content": "yue_Hant", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270931": { |
|
"content": "zho_Hans", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270932": { |
|
"content": "zho_Hant", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270933": { |
|
"content": "zul_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"270934": { |
|
"content": "mns_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"ace_Arab", |
|
"ace_Latn", |
|
"acm_Arab", |
|
"acq_Arab", |
|
"aeb_Arab", |
|
"afr_Latn", |
|
"ajp_Arab", |
|
"aka_Latn", |
|
"amh_Ethi", |
|
"apc_Arab", |
|
"arb_Arab", |
|
"ars_Arab", |
|
"ary_Arab", |
|
"arz_Arab", |
|
"asm_Beng", |
|
"ast_Latn", |
|
"awa_Deva", |
|
"ayr_Latn", |
|
"azb_Arab", |
|
"azj_Latn", |
|
"bak_Cyrl", |
|
"bam_Latn", |
|
"ban_Latn", |
|
"bel_Cyrl", |
|
"bem_Latn", |
|
"ben_Beng", |
|
"bho_Deva", |
|
"bjn_Arab", |
|
"bjn_Latn", |
|
"bod_Tibt", |
|
"bos_Latn", |
|
"bug_Latn", |
|
"bul_Cyrl", |
|
"cat_Latn", |
|
"ceb_Latn", |
|
"ces_Latn", |
|
"cjk_Latn", |
|
"ckb_Arab", |
|
"crh_Latn", |
|
"cym_Latn", |
|
"dan_Latn", |
|
"deu_Latn", |
|
"dik_Latn", |
|
"dyu_Latn", |
|
"dzo_Tibt", |
|
"ell_Grek", |
|
"eng_Latn", |
|
"epo_Latn", |
|
"est_Latn", |
|
"eus_Latn", |
|
"ewe_Latn", |
|
"fao_Latn", |
|
"pes_Arab", |
|
"fij_Latn", |
|
"fin_Latn", |
|
"fon_Latn", |
|
"fra_Latn", |
|
"fur_Latn", |
|
"fuv_Latn", |
|
"gla_Latn", |
|
"gle_Latn", |
|
"glg_Latn", |
|
"grn_Latn", |
|
"guj_Gujr", |
|
"hat_Latn", |
|
"hau_Latn", |
|
"heb_Hebr", |
|
"hin_Deva", |
|
"hne_Deva", |
|
"hrv_Latn", |
|
"hun_Latn", |
|
"hye_Armn", |
|
"ibo_Latn", |
|
"ilo_Latn", |
|
"ind_Latn", |
|
"isl_Latn", |
|
"ita_Latn", |
|
"jav_Latn", |
|
"jpn_Jpan", |
|
"kab_Latn", |
|
"kac_Latn", |
|
"kam_Latn", |
|
"kan_Knda", |
|
"kas_Arab", |
|
"kas_Deva", |
|
"kat_Geor", |
|
"knc_Arab", |
|
"knc_Latn", |
|
"kaz_Cyrl", |
|
"kbp_Latn", |
|
"kea_Latn", |
|
"khm_Khmr", |
|
"kik_Latn", |
|
"kin_Latn", |
|
"kir_Cyrl", |
|
"kmb_Latn", |
|
"kon_Latn", |
|
"kor_Hang", |
|
"kmr_Latn", |
|
"lao_Laoo", |
|
"lvs_Latn", |
|
"lij_Latn", |
|
"lim_Latn", |
|
"lin_Latn", |
|
"lit_Latn", |
|
"lmo_Latn", |
|
"ltg_Latn", |
|
"ltz_Latn", |
|
"lua_Latn", |
|
"lug_Latn", |
|
"luo_Latn", |
|
"lus_Latn", |
|
"mag_Deva", |
|
"mai_Deva", |
|
"mal_Mlym", |
|
"mar_Deva", |
|
"min_Latn", |
|
"mkd_Cyrl", |
|
"plt_Latn", |
|
"mlt_Latn", |
|
"mni_Beng", |
|
"khk_Cyrl", |
|
"mos_Latn", |
|
"mri_Latn", |
|
"zsm_Latn", |
|
"mya_Mymr", |
|
"nld_Latn", |
|
"nno_Latn", |
|
"nob_Latn", |
|
"npi_Deva", |
|
"nso_Latn", |
|
"nus_Latn", |
|
"nya_Latn", |
|
"oci_Latn", |
|
"gaz_Latn", |
|
"ory_Orya", |
|
"pag_Latn", |
|
"pan_Guru", |
|
"pap_Latn", |
|
"pol_Latn", |
|
"por_Latn", |
|
"prs_Arab", |
|
"pbt_Arab", |
|
"quy_Latn", |
|
"ron_Latn", |
|
"run_Latn", |
|
"rus_Cyrl", |
|
"sag_Latn", |
|
"san_Deva", |
|
"sat_Beng", |
|
"scn_Latn", |
|
"shn_Mymr", |
|
"sin_Sinh", |
|
"slk_Latn", |
|
"slv_Latn", |
|
"smo_Latn", |
|
"sna_Latn", |
|
"snd_Arab", |
|
"som_Latn", |
|
"sot_Latn", |
|
"spa_Latn", |
|
"als_Latn", |
|
"srd_Latn", |
|
"srp_Cyrl", |
|
"ssw_Latn", |
|
"sun_Latn", |
|
"swe_Latn", |
|
"swh_Latn", |
|
"szl_Latn", |
|
"tam_Taml", |
|
"tat_Cyrl", |
|
"tel_Telu", |
|
"tgk_Cyrl", |
|
"tgl_Latn", |
|
"tha_Thai", |
|
"tir_Ethi", |
|
"taq_Latn", |
|
"taq_Tfng", |
|
"tpi_Latn", |
|
"tsn_Latn", |
|
"tso_Latn", |
|
"tuk_Latn", |
|
"tum_Latn", |
|
"tur_Latn", |
|
"twi_Latn", |
|
"tzm_Tfng", |
|
"uig_Arab", |
|
"ukr_Cyrl", |
|
"umb_Latn", |
|
"urd_Arab", |
|
"uzn_Latn", |
|
"vec_Latn", |
|
"vie_Latn", |
|
"war_Latn", |
|
"wol_Latn", |
|
"xho_Latn", |
|
"ydd_Hebr", |
|
"yor_Latn", |
|
"yue_Hant", |
|
"zho_Hans", |
|
"zho_Hant", |
|
"zul_Latn", |
|
"mns_Cyrl" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"legacy_behaviour": false, |
|
"mask_token": "<mask>", |
|
"model_max_length": 1024, |
|
"pad_token": "<pad>", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"src_lang": "eng_Latn", |
|
"tgt_lang": null, |
|
"tokenizer_class": "NllbTokenizer", |
|
"unk_token": "<unk>" |
|
} |
|
|