kiansheik commited on
Commit
675c6bf
1 Parent(s): 79bb7c9

Added foreign names for more differences is nouns

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "models/t5-1.2_space/",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "models/t5-1.3_base_nouns/",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fe61be34df708adcf77500af072bfbd4f46352cd355b889ed6b3246145db0bd
3
  size 242181160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d06e6b690a50fd664766b6bd2cc3a5a5f0c7a2e067b6936578225046a36ad21
3
  size 242181160
special_tokens_map.json CHANGED
@@ -63,13 +63,6 @@
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
- {
67
- "content": "peẽ",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
  {
74
  "content": "[SUBJECT:2pp]",
75
  "lstrip": false,
@@ -183,21 +176,21 @@
183
  "single_word": false
184
  },
185
  {
186
- "content": "[NEGATION_SUFFIX]",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
190
  "single_word": false
191
  },
192
  {
193
- "content": "i",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
197
  "single_word": false
198
  },
199
  {
200
- "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
@@ -253,14 +246,14 @@
253
  "single_word": false
254
  },
255
  {
256
- "content": "[SUBJECT:1ppe]",
257
  "lstrip": false,
258
  "normalized": false,
259
  "rstrip": false,
260
  "single_word": false
261
  },
262
  {
263
- "content": "a",
264
  "lstrip": false,
265
  "normalized": false,
266
  "rstrip": false,
@@ -351,14 +344,14 @@
351
  "single_word": false
352
  },
353
  {
354
- "content": "nde",
355
  "lstrip": false,
356
  "normalized": false,
357
  "rstrip": false,
358
  "single_word": false
359
  },
360
  {
361
- "content": "opo",
362
  "lstrip": false,
363
  "normalized": false,
364
  "rstrip": false,
@@ -399,6 +392,13 @@
399
  "rstrip": false,
400
  "single_word": false
401
  },
 
 
 
 
 
 
 
402
  {
403
  "content": "gûi",
404
  "lstrip": false,
@@ -428,21 +428,21 @@
428
  "single_word": false
429
  },
430
  {
431
- "content": "[GERUND_SUFFIX:CLASS_1:R]",
432
  "lstrip": false,
433
  "normalized": false,
434
  "rstrip": false,
435
  "single_word": false
436
  },
437
  {
438
- "content": "endé",
439
  "lstrip": false,
440
  "normalized": false,
441
  "rstrip": false,
442
  "single_word": false
443
  },
444
  {
445
- "content": "namo",
446
  "lstrip": false,
447
  "normalized": false,
448
  "rstrip": false,
@@ -526,7 +526,7 @@
526
  "single_word": false
527
  },
528
  {
529
- "content": "[SUBJECT_PREFIX:1ps]",
530
  "lstrip": false,
531
  "normalized": false,
532
  "rstrip": false,
@@ -540,21 +540,21 @@
540
  "single_word": false
541
  },
542
  {
543
- "content": "ixé",
544
  "lstrip": false,
545
  "normalized": false,
546
  "rstrip": false,
547
  "single_word": false
548
  },
549
  {
550
- "content": "pa",
551
  "lstrip": false,
552
  "normalized": false,
553
  "rstrip": false,
554
  "single_word": false
555
  },
556
  {
557
- "content": "ramo",
558
  "lstrip": false,
559
  "normalized": false,
560
  "rstrip": false,
@@ -610,7 +610,7 @@
610
  "single_word": false
611
  },
612
  {
613
- "content": "[OBJECT:2ps:SUBJECT_1P]",
614
  "lstrip": false,
615
  "normalized": false,
616
  "rstrip": false,
@@ -624,7 +624,7 @@
624
  "single_word": false
625
  },
626
  {
627
- "content": "oro",
628
  "lstrip": false,
629
  "normalized": false,
630
  "rstrip": false,
@@ -729,7 +729,7 @@
729
  "single_word": false
730
  },
731
  {
732
- "content": "s",
733
  "lstrip": false,
734
  "normalized": false,
735
  "rstrip": false,
 
63
  "rstrip": false,
64
  "single_word": false
65
  },
 
 
 
 
 
 
 
66
  {
67
  "content": "[SUBJECT:2pp]",
68
  "lstrip": false,
 
176
  "single_word": false
177
  },
178
  {
179
+ "content": "i",
180
  "lstrip": false,
181
  "normalized": false,
182
  "rstrip": false,
183
  "single_word": false
184
  },
185
  {
186
+ "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
190
  "single_word": false
191
  },
192
  {
193
+ "content": "[NEGATION_SUFFIX]",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
 
246
  "single_word": false
247
  },
248
  {
249
+ "content": "a",
250
  "lstrip": false,
251
  "normalized": false,
252
  "rstrip": false,
253
  "single_word": false
254
  },
255
  {
256
+ "content": "[SUBJECT:1ppe]",
257
  "lstrip": false,
258
  "normalized": false,
259
  "rstrip": false,
 
344
  "single_word": false
345
  },
346
  {
347
+ "content": "opo",
348
  "lstrip": false,
349
  "normalized": false,
350
  "rstrip": false,
351
  "single_word": false
352
  },
353
  {
354
+ "content": "nde",
355
  "lstrip": false,
356
  "normalized": false,
357
  "rstrip": false,
 
392
  "rstrip": false,
393
  "single_word": false
394
  },
395
+ {
396
+ "content": "s",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false
401
+ },
402
  {
403
  "content": "gûi",
404
  "lstrip": false,
 
428
  "single_word": false
429
  },
430
  {
431
+ "content": "namo",
432
  "lstrip": false,
433
  "normalized": false,
434
  "rstrip": false,
435
  "single_word": false
436
  },
437
  {
438
+ "content": "[GERUND_SUFFIX:CLASS_1:R]",
439
  "lstrip": false,
440
  "normalized": false,
441
  "rstrip": false,
442
  "single_word": false
443
  },
444
  {
445
+ "content": "endé",
446
  "lstrip": false,
447
  "normalized": false,
448
  "rstrip": false,
 
526
  "single_word": false
527
  },
528
  {
529
+ "content": "ixé",
530
  "lstrip": false,
531
  "normalized": false,
532
  "rstrip": false,
 
540
  "single_word": false
541
  },
542
  {
543
+ "content": "[SUBJECT_PREFIX:1ps]",
544
  "lstrip": false,
545
  "normalized": false,
546
  "rstrip": false,
547
  "single_word": false
548
  },
549
  {
550
+ "content": "ramo",
551
  "lstrip": false,
552
  "normalized": false,
553
  "rstrip": false,
554
  "single_word": false
555
  },
556
  {
557
+ "content": "pa",
558
  "lstrip": false,
559
  "normalized": false,
560
  "rstrip": false,
 
610
  "single_word": false
611
  },
612
  {
613
+ "content": "oro",
614
  "lstrip": false,
615
  "normalized": false,
616
  "rstrip": false,
 
624
  "single_word": false
625
  },
626
  {
627
+ "content": "[OBJECT:2ps:SUBJECT_1P]",
628
  "lstrip": false,
629
  "normalized": false,
630
  "rstrip": false,
 
729
  "single_word": false
730
  },
731
  {
732
+ "content": "peẽ",
733
  "lstrip": false,
734
  "normalized": false,
735
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -1692,7 +1692,6 @@
1692
  "[OBJECT:2pp]",
1693
  "ta",
1694
  "n'",
1695
- "peẽ",
1696
  "[SUBJECT:2pp]",
1697
  "îe",
1698
  "[GERUND_SUBJECT_PREFIX:3p]",
@@ -1709,9 +1708,9 @@
1709
  "[GERUND_SUFFIX:CLASS_1]",
1710
  "[GERUND_SUFFIX:CLASS_1:IYU]",
1711
  "îo",
1712
- "[NEGATION_SUFFIX]",
1713
  "i",
1714
  "[GERUND_SUBJECT_PREFIX:1ppe]",
 
1715
  "[SUBJECT:3p]",
1716
  "[IMPERATIVE_PREFIX:2pp]",
1717
  "́",
@@ -1719,8 +1718,8 @@
1719
  "a'e",
1720
  "t",
1721
  "[SUBJECT:1ps]",
1722
- "[SUBJECT:1ppe]",
1723
  "a",
 
1724
  "[SUBJECT_PREFIX:1ppe]",
1725
  "amo",
1726
  "[OBJECT:2ps]",
@@ -1733,20 +1732,21 @@
1733
  "[SUBJECT:1ppi]",
1734
  "[PERMISSIVE_PREFIX:VOWEL]",
1735
  "pe",
1736
- "nde",
1737
  "opo",
 
1738
  "[OBJECT_MARKER:3p:DEFAULT]",
1739
  "[GERUND_SUBJECT_PREFIX:2ps]",
1740
  "[OBJECT:3p]",
1741
  "îandé",
1742
  "[OBJECT:2pp:SUBJECT_1P]",
 
1743
  "gûi",
1744
  "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
1745
  "[OBJECT_MARKER:3p:MONOSYLLABIC]",
1746
  "peîepé",
 
1747
  "[GERUND_SUFFIX:CLASS_1:R]",
1748
  "endé",
1749
- "namo",
1750
  "[SUBJECT:2pp:OBJECT_1P]",
1751
  "mo",
1752
  "bo",
@@ -1758,11 +1758,11 @@
1758
  "[SUBJECT_PREFIX:3p]",
1759
  "r",
1760
  "[NEGATION_SUFFIX:VOWEL_ENDING]",
1761
- "[SUBJECT_PREFIX:1ps]",
1762
- "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
1763
  "ixé",
1764
- "pa",
 
1765
  "ramo",
 
1766
  "ere",
1767
  "[OBJECT:REFLEXIVE]",
1768
  "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
@@ -1770,9 +1770,9 @@
1770
  "[GERUND_SUFFIX:CLASS_1:B]",
1771
  "[OBJECT:3p:MONOSYLLABIC]",
1772
  "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
1773
- "[OBJECT:2ps:SUBJECT_1P]",
1774
- "[GERUND_SUBJECT_PREFIX:1ps]",
1775
  "oro",
 
 
1776
  "[PERMISSIVE_PREFIX:CONSONANT]",
1777
  "í",
1778
  "oré",
@@ -1787,7 +1787,7 @@
1787
  "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
1788
  "[SUBJECT_PREFIX:1ppi]",
1789
  "[SUBJECT_PREFIX:2ps]",
1790
- "s",
1791
  "[SPACE]"
1792
  ],
1793
  "clean_up_tokenization_spaces": true,
 
1692
  "[OBJECT:2pp]",
1693
  "ta",
1694
  "n'",
 
1695
  "[SUBJECT:2pp]",
1696
  "îe",
1697
  "[GERUND_SUBJECT_PREFIX:3p]",
 
1708
  "[GERUND_SUFFIX:CLASS_1]",
1709
  "[GERUND_SUFFIX:CLASS_1:IYU]",
1710
  "îo",
 
1711
  "i",
1712
  "[GERUND_SUBJECT_PREFIX:1ppe]",
1713
+ "[NEGATION_SUFFIX]",
1714
  "[SUBJECT:3p]",
1715
  "[IMPERATIVE_PREFIX:2pp]",
1716
  "́",
 
1718
  "a'e",
1719
  "t",
1720
  "[SUBJECT:1ps]",
 
1721
  "a",
1722
+ "[SUBJECT:1ppe]",
1723
  "[SUBJECT_PREFIX:1ppe]",
1724
  "amo",
1725
  "[OBJECT:2ps]",
 
1732
  "[SUBJECT:1ppi]",
1733
  "[PERMISSIVE_PREFIX:VOWEL]",
1734
  "pe",
 
1735
  "opo",
1736
+ "nde",
1737
  "[OBJECT_MARKER:3p:DEFAULT]",
1738
  "[GERUND_SUBJECT_PREFIX:2ps]",
1739
  "[OBJECT:3p]",
1740
  "îandé",
1741
  "[OBJECT:2pp:SUBJECT_1P]",
1742
+ "s",
1743
  "gûi",
1744
  "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
1745
  "[OBJECT_MARKER:3p:MONOSYLLABIC]",
1746
  "peîepé",
1747
+ "namo",
1748
  "[GERUND_SUFFIX:CLASS_1:R]",
1749
  "endé",
 
1750
  "[SUBJECT:2pp:OBJECT_1P]",
1751
  "mo",
1752
  "bo",
 
1758
  "[SUBJECT_PREFIX:3p]",
1759
  "r",
1760
  "[NEGATION_SUFFIX:VOWEL_ENDING]",
 
 
1761
  "ixé",
1762
+ "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
1763
+ "[SUBJECT_PREFIX:1ps]",
1764
  "ramo",
1765
+ "pa",
1766
  "ere",
1767
  "[OBJECT:REFLEXIVE]",
1768
  "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
 
1770
  "[GERUND_SUFFIX:CLASS_1:B]",
1771
  "[OBJECT:3p:MONOSYLLABIC]",
1772
  "[GERUND_SUFFIX:CLASS_1:CONSONANT]",
 
 
1773
  "oro",
1774
+ "[GERUND_SUBJECT_PREFIX:1ps]",
1775
+ "[OBJECT:2ps:SUBJECT_1P]",
1776
  "[PERMISSIVE_PREFIX:CONSONANT]",
1777
  "í",
1778
  "oré",
 
1787
  "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
1788
  "[SUBJECT_PREFIX:1ppi]",
1789
  "[SUBJECT_PREFIX:2ps]",
1790
+ "peẽ",
1791
  "[SPACE]"
1792
  ],
1793
  "clean_up_tokenization_spaces": true,