tupi-verb-anotation / added_tokens.json
kiansheik's picture
Add special character tokens for broken words
fe9c799 verified
raw
history blame
6.12 kB
{
"<extra_id_0>": 32099,
"<extra_id_10>": 32089,
"<extra_id_11>": 32088,
"<extra_id_12>": 32087,
"<extra_id_13>": 32086,
"<extra_id_14>": 32085,
"<extra_id_15>": 32084,
"<extra_id_16>": 32083,
"<extra_id_17>": 32082,
"<extra_id_18>": 32081,
"<extra_id_19>": 32080,
"<extra_id_1>": 32098,
"<extra_id_20>": 32079,
"<extra_id_21>": 32078,
"<extra_id_22>": 32077,
"<extra_id_23>": 32076,
"<extra_id_24>": 32075,
"<extra_id_25>": 32074,
"<extra_id_26>": 32073,
"<extra_id_27>": 32072,
"<extra_id_28>": 32071,
"<extra_id_29>": 32070,
"<extra_id_2>": 32097,
"<extra_id_30>": 32069,
"<extra_id_31>": 32068,
"<extra_id_32>": 32067,
"<extra_id_33>": 32066,
"<extra_id_34>": 32065,
"<extra_id_35>": 32064,
"<extra_id_36>": 32063,
"<extra_id_37>": 32062,
"<extra_id_38>": 32061,
"<extra_id_39>": 32060,
"<extra_id_3>": 32096,
"<extra_id_40>": 32059,
"<extra_id_41>": 32058,
"<extra_id_42>": 32057,
"<extra_id_43>": 32056,
"<extra_id_44>": 32055,
"<extra_id_45>": 32054,
"<extra_id_46>": 32053,
"<extra_id_47>": 32052,
"<extra_id_48>": 32051,
"<extra_id_49>": 32050,
"<extra_id_4>": 32095,
"<extra_id_50>": 32049,
"<extra_id_51>": 32048,
"<extra_id_52>": 32047,
"<extra_id_53>": 32046,
"<extra_id_54>": 32045,
"<extra_id_55>": 32044,
"<extra_id_56>": 32043,
"<extra_id_57>": 32042,
"<extra_id_58>": 32041,
"<extra_id_59>": 32040,
"<extra_id_5>": 32094,
"<extra_id_60>": 32039,
"<extra_id_61>": 32038,
"<extra_id_62>": 32037,
"<extra_id_63>": 32036,
"<extra_id_64>": 32035,
"<extra_id_65>": 32034,
"<extra_id_66>": 32033,
"<extra_id_67>": 32032,
"<extra_id_68>": 32031,
"<extra_id_69>": 32030,
"<extra_id_6>": 32093,
"<extra_id_70>": 32029,
"<extra_id_71>": 32028,
"<extra_id_72>": 32027,
"<extra_id_73>": 32026,
"<extra_id_74>": 32025,
"<extra_id_75>": 32024,
"<extra_id_76>": 32023,
"<extra_id_77>": 32022,
"<extra_id_78>": 32021,
"<extra_id_79>": 32020,
"<extra_id_7>": 32092,
"<extra_id_80>": 32019,
"<extra_id_81>": 32018,
"<extra_id_82>": 32017,
"<extra_id_83>": 32016,
"<extra_id_84>": 32015,
"<extra_id_85>": 32014,
"<extra_id_86>": 32013,
"<extra_id_87>": 32012,
"<extra_id_88>": 32011,
"<extra_id_89>": 32010,
"<extra_id_8>": 32091,
"<extra_id_90>": 32009,
"<extra_id_91>": 32008,
"<extra_id_92>": 32007,
"<extra_id_93>": 32006,
"<extra_id_94>": 32005,
"<extra_id_95>": 32004,
"<extra_id_96>": 32003,
"<extra_id_97>": 32002,
"<extra_id_98>": 32001,
"<extra_id_99>": 32000,
"<extra_id_9>": 32090,
"[CIRCUMSTANTIAL_SUFFIX:CONSONANT_ENDING]": 32133,
"[CIRCUMSTANTIAL_SUFFIX:NULL_ENDING]": 32180,
"[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]": 32160,
"[GERUND_SUBJECT_PREFIX:1ppe]": 32168,
"[GERUND_SUBJECT_PREFIX:1ppi]": 32150,
"[GERUND_SUBJECT_PREFIX:1ps]": 32184,
"[GERUND_SUBJECT_PREFIX:2pp]": 32122,
"[GERUND_SUBJECT_PREFIX:2ps]": 32123,
"[GERUND_SUBJECT_PREFIX:3p]": 32132,
"[GERUND_SUFFIX:CLASS_1:B]": 32177,
"[GERUND_SUFFIX:CLASS_1:CONSONANT]": 32175,
"[GERUND_SUFFIX:CLASS_1:IYU]": 32125,
"[GERUND_SUFFIX:CLASS_1:NASAL_IYU]": 32146,
"[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]": 32109,
"[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]": 32174,
"[GERUND_SUFFIX:CLASS_1:R]": 32162,
"[GERUND_SUFFIX:CLASS_1]": 32100,
"[GERUND_SUFFIX:CLASS_2:DEFAULT]": 32176,
"[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]": 32166,
"[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]": 32170,
"[IMPERATIVE_PREFIX:2pp]": 32144,
"[IMPERATIVE_PREFIX:2ps]": 32153,
"[MAIN_VERB]": 32196,
"[NEGATION_PARTICLE:NA]": 32152,
"[NEGATION_PARTICLE:UME]": 32135,
"[NEGATION_PREFIX]": 32143,
"[NEGATION_SUFFIX:CONSONANT_ENDING]": 32103,
"[NEGATION_SUFFIX:VOWEL_ENDING]": 32101,
"[NEGATION_SUFFIX]": 32156,
"[OBJECT:1ppe]": 32116,
"[OBJECT:1ppi]": 32190,
"[OBJECT:1ps]": 32124,
"[OBJECT:2pp:SUBJECT_1P]": 32145,
"[OBJECT:2pp]": 32102,
"[OBJECT:2ps:SUBJECT_1P]": 32186,
"[OBJECT:2ps]": 32179,
"[OBJECT:3p:MONOSYLLABIC]": 32167,
"[OBJECT:3p]": 32137,
"[OBJECT:DIRECT]": 32194,
"[OBJECT:MUTUAL]": 32106,
"[OBJECT:REFLEXIVE]": 32148,
"[OBJECT_MARKER:3p:DEFAULT]": 32114,
"[OBJECT_MARKER:3p:MONOSYLLABIC]": 32108,
"[OBJECT_MARKER:3p:PLURIFORM_PREFIX:MONOSYLLABIC]": 32151,
"[PAD]": 32192,
"[PERMISSIVE_PREFIX:CONSONANT]": 32149,
"[PERMISSIVE_PREFIX:VOWEL]": 32118,
"[PLURIFORM_PREFIX:R]": 32141,
"[PLURIFORM_PREFIX:S]": 32154,
"[ROOT]": 32163,
"[SPACE]": 32195,
"[SUBJECT:1ppe]": 32158,
"[SUBJECT:1ppi]": 32120,
"[SUBJECT:1ps]": 32112,
"[SUBJECT:2pp:OBJECT_1P]": 32131,
"[SUBJECT:2pp]": 32172,
"[SUBJECT:2ps:OBJECT_1P]": 32178,
"[SUBJECT:2ps]": 32173,
"[SUBJECT:3p:DIRECT]": 32193,
"[SUBJECT:3p]": 32113,
"[SUBJECT_PREFIX:1ppe]": 32189,
"[SUBJECT_PREFIX:1ppi]": 32107,
"[SUBJECT_PREFIX:1ps]": 32159,
"[SUBJECT_PREFIX:2pp]": 32119,
"[SUBJECT_PREFIX:2ps]": 32130,
"[SUBJECT_PREFIX:3p]": 32171,
"[SUB_VERB]": 32197,
"[w0q]": 32216,
"[w10q]": 32224,
"[w11q]": 32225,
"[w12q]": 32226,
"[w13q]": 32227,
"[w14q]": 32228,
"[w15q]": 32229,
"[w1q]": 32204,
"[w1q]a": 32211,
"[w1q]and[w4q]": 32200,
"[w1q]e": 32209,
"[w1q]ep[w4q]": 32205,
"[w1q]o": 32207,
"[w1q]os": 32199,
"[w2q]": 32217,
"[w3q]": 32218,
"[w4q]": 32219,
"[w5q]": 32214,
"[w6q]": 32220,
"[w7q]": 32221,
"[w8q]": 32222,
"[w9q]": 32223,
"a'e": 32127,
"a[w15q]e": 32213,
"amo": 32104,
"bo": 32110,
"e'ym": 32139,
"e[w15q]ym": 32201,
"end[w4q]": 32215,
"endé": 32128,
"ere": 32157,
"g[w0q]i": 32212,
"gûi": 32185,
"ix[w4q]": 32208,
"ixé": 32165,
"mo": 32164,
"n'": 32126,
"n[w15q]": 32202,
"na": 32155,
"namo": 32142,
"nde": 32181,
"opo": 32183,
"or[w4q]": 32210,
"oro": 32105,
"oré": 32169,
"pa": 32136,
"pe[w10q]": 32206,
"pe[w1q]ep[w4q]": 32198,
"peîepé": 32117,
"peẽ": 32138,
"ramo": 32161,
"ta": 32129,
"um[w10q]": 32203,
"umẽ": 32191,
"xe": 32182,
"í": 32121,
"îa": 32187,
"îandé": 32115,
"îe": 32147,
"îepé": 32188,
"îo": 32140,
"îos": 32134,
"́": 32111
}