Added foreign names for more differences is nouns

Browse files

Files changed (4) hide show

config.json +1 -1
model.safetensors +1 -1
special_tokens_map.json +24 -24
tokenizer_config.json +11 -11

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "models/t5-1.2_space/",
   "architectures": [
     "T5ForConditionalGeneration"
   ],

 {
+  "_name_or_path": "models/t5-1.3_base_nouns/",
   "architectures": [
     "T5ForConditionalGeneration"
   ],

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fe61be34df708adcf77500af072bfbd4f46352cd355b889ed6b3246145db0bd
 size 242181160

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d06e6b690a50fd664766b6bd2cc3a5a5f0c7a2e067b6936578225046a36ad21
 size 242181160

special_tokens_map.json CHANGED Viewed

@@ -63,13 +63,6 @@
       "rstrip": false,
       "single_word": false
     },
-    {
-      "content": "peẽ",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
-    },
     {
       "content": "[SUBJECT:2pp]",
       "lstrip": false,
@@ -183,21 +176,21 @@
       "single_word": false
     },
     {
-      "content": "[NEGATION_SUFFIX]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "i",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -253,14 +246,14 @@
       "single_word": false
     },
     {
-      "content": "[SUBJECT:1ppe]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "a",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -351,14 +344,14 @@
       "single_word": false
     },
     {
-      "content": "nde",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "opo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -399,6 +392,13 @@
       "rstrip": false,
       "single_word": false
     },
     {
       "content": "gûi",
       "lstrip": false,
@@ -428,21 +428,21 @@
       "single_word": false
     },
     {
-      "content": "[GERUND_SUFFIX:CLASS_1:R]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "endé",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "namo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -526,7 +526,7 @@
       "single_word": false
     },
     {
-      "content": "[SUBJECT_PREFIX:1ps]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -540,21 +540,21 @@
       "single_word": false
     },
     {
-      "content": "ixé",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "pa",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "ramo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -610,7 +610,7 @@
       "single_word": false
     },
     {
-      "content": "[OBJECT:2ps:SUBJECT_1P]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -624,7 +624,7 @@
       "single_word": false
     },
     {
-      "content": "oro",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -729,7 +729,7 @@
       "single_word": false
     },
     {
-      "content": "s",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,

       "rstrip": false,
       "single_word": false
     },
     {
       "content": "[SUBJECT:2pp]",
       "lstrip": false,
       "single_word": false
     },
     {
+      "content": "i",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[NEGATION_SUFFIX]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "a",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[SUBJECT:1ppe]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "opo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "nde",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "rstrip": false,
       "single_word": false
     },
+    {
+      "content": "s",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
     {
       "content": "gûi",
       "lstrip": false,
       "single_word": false
     },
     {
+      "content": "namo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[GERUND_SUFFIX:CLASS_1:R]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "endé",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "ixé",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[SUBJECT_PREFIX:1ps]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "ramo",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "pa",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "oro",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "[OBJECT:2ps:SUBJECT_1P]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "peẽ",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -1692,7 +1692,6 @@
     "[OBJECT:2pp]",
     "ta",
     "n'",
-    "peẽ",
     "[SUBJECT:2pp]",
     "îe",
     "[GERUND_SUBJECT_PREFIX:3p]",
@@ -1709,9 +1708,9 @@
     "[GERUND_SUFFIX:CLASS_1]",
     "[GERUND_SUFFIX:CLASS_1:IYU]",
     "îo",
-    "[NEGATION_SUFFIX]",
     "i",
     "[GERUND_SUBJECT_PREFIX:1ppe]",
     "[SUBJECT:3p]",
     "[IMPERATIVE_PREFIX:2pp]",
     "́",
@@ -1719,8 +1718,8 @@
     "a'e",
     "t",
     "[SUBJECT:1ps]",
-    "[SUBJECT:1ppe]",
     "a",
     "[SUBJECT_PREFIX:1ppe]",
     "amo",
     "[OBJECT:2ps]",
@@ -1733,20 +1732,21 @@
     "[SUBJECT:1ppi]",
     "[PERMISSIVE_PREFIX:VOWEL]",
     "pe",
-    "nde",
     "opo",
     "[OBJECT_MARKER:3p:DEFAULT]",
     "[GERUND_SUBJECT_PREFIX:2ps]",
     "[OBJECT:3p]",
     "îandé",
     "[OBJECT:2pp:SUBJECT_1P]",
     "gûi",
     "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
     "[OBJECT_MARKER:3p:MONOSYLLABIC]",
     "peîepé",
     "[GERUND_SUFFIX:CLASS_1:R]",
     "endé",
-    "namo",
     "[SUBJECT:2pp:OBJECT_1P]",
     "mo",
     "bo",
@@ -1758,11 +1758,11 @@
     "[SUBJECT_PREFIX:3p]",
     "r",
     "[NEGATION_SUFFIX:VOWEL_ENDING]",
-    "[SUBJECT_PREFIX:1ps]",
-    "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
     "ixé",
-    "pa",
     "ramo",
     "ere",
     "[OBJECT:REFLEXIVE]",
     "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
@@ -1770,9 +1770,9 @@
     "[GERUND_SUFFIX:CLASS_1:B]",
     "[OBJECT:3p:MONOSYLLABIC]",
     "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
-    "[OBJECT:2ps:SUBJECT_1P]",
-    "[GERUND_SUBJECT_PREFIX:1ps]",
     "oro",
     "[PERMISSIVE_PREFIX:CONSONANT]",
     "í",
     "oré",
@@ -1787,7 +1787,7 @@
     "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
     "[SUBJECT_PREFIX:1ppi]",
     "[SUBJECT_PREFIX:2ps]",
-    "s",
     "[SPACE]"
   ],
   "clean_up_tokenization_spaces": true,

     "[OBJECT:2pp]",
     "ta",
     "n'",
     "[SUBJECT:2pp]",
     "îe",
     "[GERUND_SUBJECT_PREFIX:3p]",
     "[GERUND_SUFFIX:CLASS_1]",
     "[GERUND_SUFFIX:CLASS_1:IYU]",
     "îo",
     "i",
     "[GERUND_SUBJECT_PREFIX:1ppe]",
+    "[NEGATION_SUFFIX]",
     "[SUBJECT:3p]",
     "[IMPERATIVE_PREFIX:2pp]",
     "́",
     "a'e",
     "t",
     "[SUBJECT:1ps]",
     "a",
+    "[SUBJECT:1ppe]",
     "[SUBJECT_PREFIX:1ppe]",
     "amo",
     "[OBJECT:2ps]",
     "[SUBJECT:1ppi]",
     "[PERMISSIVE_PREFIX:VOWEL]",
     "pe",
     "opo",
+    "nde",
     "[OBJECT_MARKER:3p:DEFAULT]",
     "[GERUND_SUBJECT_PREFIX:2ps]",
     "[OBJECT:3p]",
     "îandé",
     "[OBJECT:2pp:SUBJECT_1P]",
+    "s",
     "gûi",
     "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
     "[OBJECT_MARKER:3p:MONOSYLLABIC]",
     "peîepé",
+    "namo",
     "[GERUND_SUFFIX:CLASS_1:R]",
     "endé",
     "[SUBJECT:2pp:OBJECT_1P]",
     "mo",
     "bo",
     "[SUBJECT_PREFIX:3p]",
     "r",
     "[NEGATION_SUFFIX:VOWEL_ENDING]",
     "ixé",
+    "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
+    "[SUBJECT_PREFIX:1ps]",
     "ramo",
+    "pa",
     "ere",
     "[OBJECT:REFLEXIVE]",
     "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
     "[GERUND_SUFFIX:CLASS_1:B]",
     "[OBJECT:3p:MONOSYLLABIC]",
     "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
     "oro",
+    "[GERUND_SUBJECT_PREFIX:1ps]",
+    "[OBJECT:2ps:SUBJECT_1P]",
     "[PERMISSIVE_PREFIX:CONSONANT]",
     "í",
     "oré",
     "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
     "[SUBJECT_PREFIX:1ppi]",
     "[SUBJECT_PREFIX:2ps]",
+    "peẽ",
     "[SPACE]"
   ],
   "clean_up_tokenization_spaces": true,