kiansheik
/

tupi-verb-anotation

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c8b330e8b3e8fbff4a45942c27fe14746e74b2cdbc5b4027ddca51fcbbf9c2b
 size 242185256

 version https://git-lfs.github.com/spec/v1
+oid sha256:d80dfecd26b9f6c7e6fecc204679f434def1ff66f0c81494fdbb784d1234b6a9
 size 242185256

special_tokens_map.json CHANGED Viewed

@@ -197,7 +197,7 @@
       "single_word": false
     },
     {
-      "content": "i",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -211,7 +211,7 @@
       "single_word": false
     },
     {
-      "content": "[NEGATION_SUFFIX]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -267,14 +267,14 @@
       "single_word": false
     },
     {
-      "content": "a",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "[SUBJECT:1ps]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -442,7 +442,7 @@
       "single_word": false
     },
     {
-      "content": "namo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -456,7 +456,7 @@
       "single_word": false
     },
     {
-      "content": "[GERUND_SUFFIX:CLASS_1:R]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -547,14 +547,14 @@
       "single_word": false
     },
     {
-      "content": "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "ixé",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -631,28 +631,28 @@
       "single_word": false
     },
     {
-      "content": "[GERUND_SUBJECT_PREFIX:1ps]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "[OBJECT:2ps:SUBJECT_1P]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "í",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "[PERMISSIVE_PREFIX:CONSONANT]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,

       "single_word": false
     },
     {
+      "content": "[NEGATION_SUFFIX]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "i",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[SUBJECT:1ps]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "a",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[GERUND_SUFFIX:CLASS_1:R]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "namo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "ixé",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[OBJECT:2ps:SUBJECT_1P]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[GERUND_SUBJECT_PREFIX:1ps]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[PERMISSIVE_PREFIX:CONSONANT]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "í",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -1727,9 +1727,9 @@
     "[SUB_VERB]",
     "[GERUND_SUFFIX:CLASS_1:IYU]",
     "îo",
-    "i",
-    "[GERUND_SUBJECT_PREFIX:1ppe]",
     "[NEGATION_SUFFIX]",
     "[SUBJECT:3p]",
     "[IMPERATIVE_PREFIX:2pp]",
     "́",
@@ -1737,8 +1737,8 @@
     "a'e",
     "t",
     "[SUBJECT:1ppe]",
-    "a",
     "[SUBJECT:1ps]",
     "[SUBJECT_PREFIX:1ppe]",
     "amo",
     "[OBJECT:2ps]",
@@ -1762,9 +1762,9 @@
     "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
     "[OBJECT_MARKER:3p:MONOSYLLABIC]",
     "peîepé",
-    "namo",
-    "endé",
     "[GERUND_SUFFIX:CLASS_1:R]",
     "[SUBJECT:2pp:OBJECT_1P]",
     "mo",
     "bo",
@@ -1777,8 +1777,8 @@
     "r",
     "[NEGATION_SUFFIX:VOWEL_ENDING]",
     "[SUBJECT_PREFIX:1ps]",
-    "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
     "ixé",
     "pa",
     "ramo",
     "ere",
@@ -1789,10 +1789,10 @@
     "[OBJECT:3p:MONOSYLLABIC]",
     "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
     "oro",
-    "[GERUND_SUBJECT_PREFIX:1ps]",
     "[OBJECT:2ps:SUBJECT_1P]",
-    "í",
     "[PERMISSIVE_PREFIX:CONSONANT]",
     "oré",
     "umẽ",
     "[PLURIFORM_PREFIX:R]",

     "[SUB_VERB]",
     "[GERUND_SUFFIX:CLASS_1:IYU]",
     "îo",
     "[NEGATION_SUFFIX]",
+    "[GERUND_SUBJECT_PREFIX:1ppe]",
+    "i",
     "[SUBJECT:3p]",
     "[IMPERATIVE_PREFIX:2pp]",
     "́",
     "a'e",
     "t",
     "[SUBJECT:1ppe]",
     "[SUBJECT:1ps]",
+    "a",
     "[SUBJECT_PREFIX:1ppe]",
     "amo",
     "[OBJECT:2ps]",
     "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
     "[OBJECT_MARKER:3p:MONOSYLLABIC]",
     "peîepé",
     "[GERUND_SUFFIX:CLASS_1:R]",
+    "endé",
+    "namo",
     "[SUBJECT:2pp:OBJECT_1P]",
     "mo",
     "bo",
     "r",
     "[NEGATION_SUFFIX:VOWEL_ENDING]",
     "[SUBJECT_PREFIX:1ps]",
     "ixé",
+    "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
     "pa",
     "ramo",
     "ere",
     "[OBJECT:3p:MONOSYLLABIC]",
     "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
     "oro",
     "[OBJECT:2ps:SUBJECT_1P]",
+    "[GERUND_SUBJECT_PREFIX:1ps]",
     "[PERMISSIVE_PREFIX:CONSONANT]",
+    "í",
     "oré",
     "umẽ",
     "[PLURIFORM_PREFIX:R]",