kiansheik commited on
Commit
17b8760
1 Parent(s): 675c6bf

Complex Clauses

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d06e6b690a50fd664766b6bd2cc3a5a5f0c7a2e067b6936578225046a36ad21
3
  size 242181160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6429f08c9d32f73aca7ddb79ef42875ea88b53e600934f4c5dffe4b8ae5ac131
3
  size 242181160
special_tokens_map.json CHANGED
@@ -63,6 +63,13 @@
63
  "rstrip": false,
64
  "single_word": false
65
  },
 
 
 
 
 
 
 
66
  {
67
  "content": "[SUBJECT:2pp]",
68
  "lstrip": false,
@@ -183,14 +190,14 @@
183
  "single_word": false
184
  },
185
  {
186
- "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
190
  "single_word": false
191
  },
192
  {
193
- "content": "[NEGATION_SUFFIX]",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
@@ -344,14 +351,14 @@
344
  "single_word": false
345
  },
346
  {
347
- "content": "opo",
348
  "lstrip": false,
349
  "normalized": false,
350
  "rstrip": false,
351
  "single_word": false
352
  },
353
  {
354
- "content": "nde",
355
  "lstrip": false,
356
  "normalized": false,
357
  "rstrip": false,
@@ -386,21 +393,14 @@
386
  "single_word": false
387
  },
388
  {
389
- "content": "[OBJECT:2pp:SUBJECT_1P]",
390
- "lstrip": false,
391
- "normalized": false,
392
- "rstrip": false,
393
- "single_word": false
394
- },
395
- {
396
- "content": "s",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
400
  "single_word": false
401
  },
402
  {
403
- "content": "gûi",
404
  "lstrip": false,
405
  "normalized": false,
406
  "rstrip": false,
@@ -428,21 +428,21 @@
428
  "single_word": false
429
  },
430
  {
431
- "content": "namo",
432
  "lstrip": false,
433
  "normalized": false,
434
  "rstrip": false,
435
  "single_word": false
436
  },
437
  {
438
- "content": "[GERUND_SUFFIX:CLASS_1:R]",
439
  "lstrip": false,
440
  "normalized": false,
441
  "rstrip": false,
442
  "single_word": false
443
  },
444
  {
445
- "content": "endé",
446
  "lstrip": false,
447
  "normalized": false,
448
  "rstrip": false,
@@ -631,14 +631,14 @@
631
  "single_word": false
632
  },
633
  {
634
- "content": "[PERMISSIVE_PREFIX:CONSONANT]",
635
  "lstrip": false,
636
  "normalized": false,
637
  "rstrip": false,
638
  "single_word": false
639
  },
640
  {
641
- "content": "í",
642
  "lstrip": false,
643
  "normalized": false,
644
  "rstrip": false,
@@ -729,7 +729,7 @@
729
  "single_word": false
730
  },
731
  {
732
- "content": "peẽ",
733
  "lstrip": false,
734
  "normalized": false,
735
  "rstrip": false,
 
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
+ {
67
+ "content": "peẽ",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
  {
74
  "content": "[SUBJECT:2pp]",
75
  "lstrip": false,
 
190
  "single_word": false
191
  },
192
  {
193
+ "content": "[NEGATION_SUFFIX]",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
197
  "single_word": false
198
  },
199
  {
200
+ "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
 
351
  "single_word": false
352
  },
353
  {
354
+ "content": "nde",
355
  "lstrip": false,
356
  "normalized": false,
357
  "rstrip": false,
358
  "single_word": false
359
  },
360
  {
361
+ "content": "opo",
362
  "lstrip": false,
363
  "normalized": false,
364
  "rstrip": false,
 
393
  "single_word": false
394
  },
395
  {
396
+ "content": "gûi",
 
 
 
 
 
 
 
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
400
  "single_word": false
401
  },
402
  {
403
+ "content": "[OBJECT:2pp:SUBJECT_1P]",
404
  "lstrip": false,
405
  "normalized": false,
406
  "rstrip": false,
 
428
  "single_word": false
429
  },
430
  {
431
+ "content": "endé",
432
  "lstrip": false,
433
  "normalized": false,
434
  "rstrip": false,
435
  "single_word": false
436
  },
437
  {
438
+ "content": "namo",
439
  "lstrip": false,
440
  "normalized": false,
441
  "rstrip": false,
442
  "single_word": false
443
  },
444
  {
445
+ "content": "[GERUND_SUFFIX:CLASS_1:R]",
446
  "lstrip": false,
447
  "normalized": false,
448
  "rstrip": false,
 
631
  "single_word": false
632
  },
633
  {
634
+ "content": "í",
635
  "lstrip": false,
636
  "normalized": false,
637
  "rstrip": false,
638
  "single_word": false
639
  },
640
  {
641
+ "content": "[PERMISSIVE_PREFIX:CONSONANT]",
642
  "lstrip": false,
643
  "normalized": false,
644
  "rstrip": false,
 
729
  "single_word": false
730
  },
731
  {
732
+ "content": "s",
733
  "lstrip": false,
734
  "normalized": false,
735
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -1692,6 +1692,7 @@
1692
  "[OBJECT:2pp]",
1693
  "ta",
1694
  "n'",
 
1695
  "[SUBJECT:2pp]",
1696
  "îe",
1697
  "[GERUND_SUBJECT_PREFIX:3p]",
@@ -1709,8 +1710,8 @@
1709
  "[GERUND_SUFFIX:CLASS_1:IYU]",
1710
  "îo",
1711
  "i",
1712
- "[GERUND_SUBJECT_PREFIX:1ppe]",
1713
  "[NEGATION_SUFFIX]",
 
1714
  "[SUBJECT:3p]",
1715
  "[IMPERATIVE_PREFIX:2pp]",
1716
  "́",
@@ -1732,21 +1733,20 @@
1732
  "[SUBJECT:1ppi]",
1733
  "[PERMISSIVE_PREFIX:VOWEL]",
1734
  "pe",
1735
- "opo",
1736
  "nde",
 
1737
  "[OBJECT_MARKER:3p:DEFAULT]",
1738
  "[GERUND_SUBJECT_PREFIX:2ps]",
1739
  "[OBJECT:3p]",
1740
  "îandé",
1741
- "[OBJECT:2pp:SUBJECT_1P]",
1742
- "s",
1743
  "gûi",
 
1744
  "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
1745
  "[OBJECT_MARKER:3p:MONOSYLLABIC]",
1746
  "peîepé",
 
1747
  "namo",
1748
  "[GERUND_SUFFIX:CLASS_1:R]",
1749
- "endé",
1750
  "[SUBJECT:2pp:OBJECT_1P]",
1751
  "mo",
1752
  "bo",
@@ -1773,8 +1773,8 @@
1773
  "oro",
1774
  "[GERUND_SUBJECT_PREFIX:1ps]",
1775
  "[OBJECT:2ps:SUBJECT_1P]",
1776
- "[PERMISSIVE_PREFIX:CONSONANT]",
1777
  "í",
 
1778
  "oré",
1779
  "umẽ",
1780
  "[SUBJECT:2ps]",
@@ -1787,7 +1787,7 @@
1787
  "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
1788
  "[SUBJECT_PREFIX:1ppi]",
1789
  "[SUBJECT_PREFIX:2ps]",
1790
- "peẽ",
1791
  "[SPACE]"
1792
  ],
1793
  "clean_up_tokenization_spaces": true,
 
1692
  "[OBJECT:2pp]",
1693
  "ta",
1694
  "n'",
1695
+ "peẽ",
1696
  "[SUBJECT:2pp]",
1697
  "îe",
1698
  "[GERUND_SUBJECT_PREFIX:3p]",
 
1710
  "[GERUND_SUFFIX:CLASS_1:IYU]",
1711
  "îo",
1712
  "i",
 
1713
  "[NEGATION_SUFFIX]",
1714
+ "[GERUND_SUBJECT_PREFIX:1ppe]",
1715
  "[SUBJECT:3p]",
1716
  "[IMPERATIVE_PREFIX:2pp]",
1717
  "́",
 
1733
  "[SUBJECT:1ppi]",
1734
  "[PERMISSIVE_PREFIX:VOWEL]",
1735
  "pe",
 
1736
  "nde",
1737
+ "opo",
1738
  "[OBJECT_MARKER:3p:DEFAULT]",
1739
  "[GERUND_SUBJECT_PREFIX:2ps]",
1740
  "[OBJECT:3p]",
1741
  "îandé",
 
 
1742
  "gûi",
1743
+ "[OBJECT:2pp:SUBJECT_1P]",
1744
  "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
1745
  "[OBJECT_MARKER:3p:MONOSYLLABIC]",
1746
  "peîepé",
1747
+ "endé",
1748
  "namo",
1749
  "[GERUND_SUFFIX:CLASS_1:R]",
 
1750
  "[SUBJECT:2pp:OBJECT_1P]",
1751
  "mo",
1752
  "bo",
 
1773
  "oro",
1774
  "[GERUND_SUBJECT_PREFIX:1ps]",
1775
  "[OBJECT:2ps:SUBJECT_1P]",
 
1776
  "í",
1777
+ "[PERMISSIVE_PREFIX:CONSONANT]",
1778
  "oré",
1779
  "umẽ",
1780
  "[SUBJECT:2ps]",
 
1787
  "[GERUND_SUFFIX:CLASS_2:NASAL_VOWEL_ENDING]",
1788
  "[SUBJECT_PREFIX:1ppi]",
1789
  "[SUBJECT_PREFIX:2ps]",
1790
+ "s",
1791
  "[SPACE]"
1792
  ],
1793
  "clean_up_tokenization_spaces": true,