tupi-verb-anotation / special_tokens_map.json
kiansheik's picture
Upload 7 files (#1)
c34680d verified
raw
history blame
15.8 kB
{
"additional_special_tokens": [
{
"content": "e'ym",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "îepé",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[ROOT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PLURIFORM_PREFIX:S]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2ps:SUBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_PARTICLE:NA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "îe",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "́",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "amo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "îa",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "umẽ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[IMPERATIVE_PREFIX:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "o",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "xe",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PLURIFORM_PREFIX:R]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "í",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "n'",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2pp:SUBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PERMISSIVE_PREFIX:CONSONANT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:1ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "î",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT_MARKER:3p:PLURIFORM_PREFIX:MONOSYLLABIC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "pe",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_PREFIX]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "abo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "na",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:R]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_PARTICLE:UME]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "îandé",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_SUFFIX:CONSONANT_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[CIRCUMSTANTIAL_SUFFIX:CONSONANT_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "gûi",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ixé",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "namo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ere",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "mo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT_MARKER:3p:DEFAULT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:1ppi]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "peẽ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "s",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:3p]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "û",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2ps:OBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "îos",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "nde",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "endé",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:MUTUAL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "oré",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "t",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "bo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:IYU]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "r",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "i",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ramo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:3p:MONOSYLLABIC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "ta",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "a'e",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "peîepé",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "a",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUBJECT_PREFIX:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_2:DEFAULT]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:1ppe]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_SUFFIX]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "e",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[PERMISSIVE_PREFIX:VOWEL]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT:REFLEXIVE]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "pa",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT_PREFIX:2pp]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[GERUND_SUFFIX:CLASS_1:B]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[SUBJECT:2pp:OBJECT_1P]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "oro",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[NEGATION_SUFFIX:VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[CIRCUMSTANTIAL_SUFFIX:NULL_ENDING]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[IMPERATIVE_PREFIX:2ps]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "[OBJECT_MARKER:3p:MONOSYLLABIC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "opo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "îo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
],
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}