kiansheik commited on
Commit
12f53bf
1 Parent(s): 17b8760

Train with Main verb Sub verb tags

Browse files
added_tokens.json CHANGED
@@ -121,6 +121,7 @@
121
  "[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]": 32170,
122
  "[IMPERATIVE_PREFIX:2pp]": 32144,
123
  "[IMPERATIVE_PREFIX:2ps]": 32153,
 
124
  "[NEGATION_PARTICLE:NA]": 32152,
125
  "[NEGATION_PARTICLE:UME]": 32135,
126
  "[NEGATION_PREFIX]": 32143,
@@ -164,6 +165,7 @@
164
  "[SUBJECT_PREFIX:2pp]": 32119,
165
  "[SUBJECT_PREFIX:2ps]": 32130,
166
  "[SUBJECT_PREFIX:3p]": 32171,
 
167
  "a'e": 32127,
168
  "amo": 32104,
169
  "bo": 32110,
 
121
  "[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]": 32170,
122
  "[IMPERATIVE_PREFIX:2pp]": 32144,
123
  "[IMPERATIVE_PREFIX:2ps]": 32153,
124
+ "[MAIN_VERB]": 32196,
125
  "[NEGATION_PARTICLE:NA]": 32152,
126
  "[NEGATION_PARTICLE:UME]": 32135,
127
  "[NEGATION_PREFIX]": 32143,
 
165
  "[SUBJECT_PREFIX:2pp]": 32119,
166
  "[SUBJECT_PREFIX:2ps]": 32130,
167
  "[SUBJECT_PREFIX:3p]": 32171,
168
+ "[SUB_VERB]": 32197,
169
  "a'e": 32127,
170
  "amo": 32104,
171
  "bo": 32110,
config.json CHANGED
@@ -57,5 +57,5 @@
57
  "torch_dtype": "float32",
58
  "transformers_version": "4.38.1",
59
  "use_cache": true,
60
- "vocab_size": 32196
61
  }
 
57
  "torch_dtype": "float32",
58
  "transformers_version": "4.38.1",
59
  "use_cache": true,
60
+ "vocab_size": 32198
61
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6429f08c9d32f73aca7ddb79ef42875ea88b53e600934f4c5dffe4b8ae5ac131
3
- size 242181160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c8b330e8b3e8fbff4a45942c27fe14746e74b2cdbc5b4027ddca51fcbbf9c2b
3
+ size 242185256
special_tokens_map.json CHANGED
@@ -119,6 +119,13 @@
119
  "rstrip": false,
120
  "single_word": false
121
  },
 
 
 
 
 
 
 
122
  {
123
  "content": "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
124
  "lstrip": false,
@@ -168,6 +175,13 @@
168
  "rstrip": false,
169
  "single_word": false
170
  },
 
 
 
 
 
 
 
171
  {
172
  "content": "[GERUND_SUFFIX:CLASS_1:IYU]",
173
  "lstrip": false,
@@ -190,14 +204,14 @@
190
  "single_word": false
191
  },
192
  {
193
- "content": "[NEGATION_SUFFIX]",
194
  "lstrip": false,
195
  "normalized": false,
196
  "rstrip": false,
197
  "single_word": false
198
  },
199
  {
200
- "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
201
  "lstrip": false,
202
  "normalized": false,
203
  "rstrip": false,
@@ -246,7 +260,7 @@
246
  "single_word": false
247
  },
248
  {
249
- "content": "[SUBJECT:1ps]",
250
  "lstrip": false,
251
  "normalized": false,
252
  "rstrip": false,
@@ -260,7 +274,7 @@
260
  "single_word": false
261
  },
262
  {
263
- "content": "[SUBJECT:1ppe]",
264
  "lstrip": false,
265
  "normalized": false,
266
  "rstrip": false,
@@ -358,14 +372,14 @@
358
  "single_word": false
359
  },
360
  {
361
- "content": "opo",
362
  "lstrip": false,
363
  "normalized": false,
364
  "rstrip": false,
365
  "single_word": false
366
  },
367
  {
368
- "content": "[OBJECT_MARKER:3p:DEFAULT]",
369
  "lstrip": false,
370
  "normalized": false,
371
  "rstrip": false,
@@ -428,14 +442,14 @@
428
  "single_word": false
429
  },
430
  {
431
- "content": "endé",
432
  "lstrip": false,
433
  "normalized": false,
434
  "rstrip": false,
435
  "single_word": false
436
  },
437
  {
438
- "content": "namo",
439
  "lstrip": false,
440
  "normalized": false,
441
  "rstrip": false,
@@ -526,7 +540,7 @@
526
  "single_word": false
527
  },
528
  {
529
- "content": "ixé",
530
  "lstrip": false,
531
  "normalized": false,
532
  "rstrip": false,
@@ -540,21 +554,21 @@
540
  "single_word": false
541
  },
542
  {
543
- "content": "[SUBJECT_PREFIX:1ps]",
544
  "lstrip": false,
545
  "normalized": false,
546
  "rstrip": false,
547
  "single_word": false
548
  },
549
  {
550
- "content": "ramo",
551
  "lstrip": false,
552
  "normalized": false,
553
  "rstrip": false,
554
  "single_word": false
555
  },
556
  {
557
- "content": "pa",
558
  "lstrip": false,
559
  "normalized": false,
560
  "rstrip": false,
@@ -659,14 +673,14 @@
659
  "single_word": false
660
  },
661
  {
662
- "content": "[SUBJECT:2ps]",
663
  "lstrip": false,
664
  "normalized": false,
665
  "rstrip": false,
666
  "single_word": false
667
  },
668
  {
669
- "content": "[PLURIFORM_PREFIX:R]",
670
  "lstrip": false,
671
  "normalized": false,
672
  "rstrip": false,
 
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
+ {
123
+ "content": "[MAIN_VERB]",
124
+ "lstrip": false,
125
+ "normalized": false,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
  {
130
  "content": "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
131
  "lstrip": false,
 
175
  "rstrip": false,
176
  "single_word": false
177
  },
178
+ {
179
+ "content": "[SUB_VERB]",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
  {
186
  "content": "[GERUND_SUFFIX:CLASS_1:IYU]",
187
  "lstrip": false,
 
204
  "single_word": false
205
  },
206
  {
207
+ "content": "[GERUND_SUBJECT_PREFIX:1ppe]",
208
  "lstrip": false,
209
  "normalized": false,
210
  "rstrip": false,
211
  "single_word": false
212
  },
213
  {
214
+ "content": "[NEGATION_SUFFIX]",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
 
260
  "single_word": false
261
  },
262
  {
263
+ "content": "[SUBJECT:1ppe]",
264
  "lstrip": false,
265
  "normalized": false,
266
  "rstrip": false,
 
274
  "single_word": false
275
  },
276
  {
277
+ "content": "[SUBJECT:1ps]",
278
  "lstrip": false,
279
  "normalized": false,
280
  "rstrip": false,
 
372
  "single_word": false
373
  },
374
  {
375
+ "content": "[OBJECT_MARKER:3p:DEFAULT]",
376
  "lstrip": false,
377
  "normalized": false,
378
  "rstrip": false,
379
  "single_word": false
380
  },
381
  {
382
+ "content": "opo",
383
  "lstrip": false,
384
  "normalized": false,
385
  "rstrip": false,
 
442
  "single_word": false
443
  },
444
  {
445
+ "content": "namo",
446
  "lstrip": false,
447
  "normalized": false,
448
  "rstrip": false,
449
  "single_word": false
450
  },
451
  {
452
+ "content": "endé",
453
  "lstrip": false,
454
  "normalized": false,
455
  "rstrip": false,
 
540
  "single_word": false
541
  },
542
  {
543
+ "content": "[SUBJECT_PREFIX:1ps]",
544
  "lstrip": false,
545
  "normalized": false,
546
  "rstrip": false,
 
554
  "single_word": false
555
  },
556
  {
557
+ "content": "ixé",
558
  "lstrip": false,
559
  "normalized": false,
560
  "rstrip": false,
561
  "single_word": false
562
  },
563
  {
564
+ "content": "pa",
565
  "lstrip": false,
566
  "normalized": false,
567
  "rstrip": false,
568
  "single_word": false
569
  },
570
  {
571
+ "content": "ramo",
572
  "lstrip": false,
573
  "normalized": false,
574
  "rstrip": false,
 
673
  "single_word": false
674
  },
675
  {
676
+ "content": "[PLURIFORM_PREFIX:R]",
677
  "lstrip": false,
678
  "normalized": false,
679
  "rstrip": false,
680
  "single_word": false
681
  },
682
  {
683
+ "content": "[SUBJECT:2ps]",
684
  "lstrip": false,
685
  "normalized": false,
686
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -1680,6 +1680,22 @@
1680
  "rstrip": false,
1681
  "single_word": false,
1682
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1683
  }
1684
  },
1685
  "additional_special_tokens": [
@@ -1700,6 +1716,7 @@
1700
  "[NEGATION_PARTICLE:NA]",
1701
  "[IMPERATIVE_PREFIX:2ps]",
1702
  "îos",
 
1703
  "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
1704
  "[PLURIFORM_PREFIX:S]",
1705
  "û",
@@ -1707,20 +1724,21 @@
1707
  "[OBJECT:1ppi]",
1708
  "abo",
1709
  "[GERUND_SUFFIX:CLASS_1]",
 
1710
  "[GERUND_SUFFIX:CLASS_1:IYU]",
1711
  "îo",
1712
  "i",
1713
- "[NEGATION_SUFFIX]",
1714
  "[GERUND_SUBJECT_PREFIX:1ppe]",
 
1715
  "[SUBJECT:3p]",
1716
  "[IMPERATIVE_PREFIX:2pp]",
1717
  "́",
1718
  "xe",
1719
  "a'e",
1720
  "t",
1721
- "[SUBJECT:1ps]",
1722
- "a",
1723
  "[SUBJECT:1ppe]",
 
 
1724
  "[SUBJECT_PREFIX:1ppe]",
1725
  "amo",
1726
  "[OBJECT:2ps]",
@@ -1734,8 +1752,8 @@
1734
  "[PERMISSIVE_PREFIX:VOWEL]",
1735
  "pe",
1736
  "nde",
1737
- "opo",
1738
  "[OBJECT_MARKER:3p:DEFAULT]",
 
1739
  "[GERUND_SUBJECT_PREFIX:2ps]",
1740
  "[OBJECT:3p]",
1741
  "îandé",
@@ -1744,8 +1762,8 @@
1744
  "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
1745
  "[OBJECT_MARKER:3p:MONOSYLLABIC]",
1746
  "peîepé",
1747
- "endé",
1748
  "namo",
 
1749
  "[GERUND_SUFFIX:CLASS_1:R]",
1750
  "[SUBJECT:2pp:OBJECT_1P]",
1751
  "mo",
@@ -1758,11 +1776,11 @@
1758
  "[SUBJECT_PREFIX:3p]",
1759
  "r",
1760
  "[NEGATION_SUFFIX:VOWEL_ENDING]",
1761
- "ixé",
1762
- "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
1763
  "[SUBJECT_PREFIX:1ps]",
1764
- "ramo",
 
1765
  "pa",
 
1766
  "ere",
1767
  "[OBJECT:REFLEXIVE]",
1768
  "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
@@ -1777,8 +1795,8 @@
1777
  "[PERMISSIVE_PREFIX:CONSONANT]",
1778
  "oré",
1779
  "umẽ",
1780
- "[SUBJECT:2ps]",
1781
  "[PLURIFORM_PREFIX:R]",
 
1782
  "o",
1783
  "[SUBJECT:2ps:OBJECT_1P]",
1784
  "îa",
 
1680
  "rstrip": false,
1681
  "single_word": false,
1682
  "special": true
1683
+ },
1684
+ "32196": {
1685
+ "content": "[MAIN_VERB]",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "32197": {
1693
+ "content": "[SUB_VERB]",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
  }
1700
  },
1701
  "additional_special_tokens": [
 
1716
  "[NEGATION_PARTICLE:NA]",
1717
  "[IMPERATIVE_PREFIX:2ps]",
1718
  "îos",
1719
+ "[MAIN_VERB]",
1720
  "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
1721
  "[PLURIFORM_PREFIX:S]",
1722
  "û",
 
1724
  "[OBJECT:1ppi]",
1725
  "abo",
1726
  "[GERUND_SUFFIX:CLASS_1]",
1727
+ "[SUB_VERB]",
1728
  "[GERUND_SUFFIX:CLASS_1:IYU]",
1729
  "îo",
1730
  "i",
 
1731
  "[GERUND_SUBJECT_PREFIX:1ppe]",
1732
+ "[NEGATION_SUFFIX]",
1733
  "[SUBJECT:3p]",
1734
  "[IMPERATIVE_PREFIX:2pp]",
1735
  "́",
1736
  "xe",
1737
  "a'e",
1738
  "t",
 
 
1739
  "[SUBJECT:1ppe]",
1740
+ "a",
1741
+ "[SUBJECT:1ps]",
1742
  "[SUBJECT_PREFIX:1ppe]",
1743
  "amo",
1744
  "[OBJECT:2ps]",
 
1752
  "[PERMISSIVE_PREFIX:VOWEL]",
1753
  "pe",
1754
  "nde",
 
1755
  "[OBJECT_MARKER:3p:DEFAULT]",
1756
+ "opo",
1757
  "[GERUND_SUBJECT_PREFIX:2ps]",
1758
  "[OBJECT:3p]",
1759
  "îandé",
 
1762
  "[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
1763
  "[OBJECT_MARKER:3p:MONOSYLLABIC]",
1764
  "peîepé",
 
1765
  "namo",
1766
+ "endé",
1767
  "[GERUND_SUFFIX:CLASS_1:R]",
1768
  "[SUBJECT:2pp:OBJECT_1P]",
1769
  "mo",
 
1776
  "[SUBJECT_PREFIX:3p]",
1777
  "r",
1778
  "[NEGATION_SUFFIX:VOWEL_ENDING]",
 
 
1779
  "[SUBJECT_PREFIX:1ps]",
1780
+ "[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
1781
+ "ixé",
1782
  "pa",
1783
+ "ramo",
1784
  "ere",
1785
  "[OBJECT:REFLEXIVE]",
1786
  "[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
 
1795
  "[PERMISSIVE_PREFIX:CONSONANT]",
1796
  "oré",
1797
  "umẽ",
 
1798
  "[PLURIFORM_PREFIX:R]",
1799
+ "[SUBJECT:2ps]",
1800
  "o",
1801
  "[SUBJECT:2ps:OBJECT_1P]",
1802
  "îa",