tupi-verb-anotation / special_tokens_map.json
kiansheik's picture
Add special character tokens for broken words
fe9c799 verified
raw
history blame
18.9 kB
{
"additional_special_tokens": [
{
"content": "[SUBJECT:3p:DIRECT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:DIRECT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "e",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "pe[w1q]ep[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ta",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]os",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]and[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_PARTICLE:NA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[IMPERATIVE_PREFIX:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[MAIN_VERB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PLURIFORM_PREFIX:S]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "e[w15q]ym",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT_MARKER:3p:PLURIFORM_PREFIX:MONOSYLLABIC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "abo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUB_VERB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "n[w15q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:IYU]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_SUFFIX]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "i",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[IMPERATIVE_PREFIX:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "́",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "um[w10q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "xe",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "t",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "a",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]ep[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "amo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "pe[w10q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_SUFFIX:CONSONANT_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:MUTUAL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]o",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_PREFIX]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ix[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[CIRCUMSTANTIAL_SUFFIX:NULL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PERMISSIVE_PREFIX:VOWEL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]e",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "pe",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "nde",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT_MARKER:3p:DEFAULT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "opo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "or[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]a",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "g[w0q]i",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2pp:SUBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT_MARKER:3p:MONOSYLLABIC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "a[w15q]e",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:R]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "namo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2pp:OBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "mo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "bo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[CIRCUMSTANTIAL_SUFFIX:CONSONANT_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[ROOT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_PARTICLE:UME]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "r",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_SUFFIX:VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "pa",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ramo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ere",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:REFLEXIVE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w5q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "na",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:B]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:3p:MONOSYLLABIC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "oro",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2ps:SUBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PERMISSIVE_PREFIX:CONSONANT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PLURIFORM_PREFIX:R]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "o",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2ps:OBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "end[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w0q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_2:DEFAULT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "s",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SPACE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w0q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w1q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w2q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w3q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w4q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w5q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w6q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w7q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w8q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w9q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w10q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w11q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w12q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w13q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w14q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[w15q]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
],
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}