Train with Main verb Sub verb tags
Browse files- added_tokens.json +2 -0
- config.json +1 -1
- model.safetensors +2 -2
- special_tokens_map.json +28 -14
- tokenizer_config.json +27 -9
added_tokens.json
CHANGED
@@ -121,6 +121,7 @@
|
|
121 |
"[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]": 32170,
|
122 |
"[IMPERATIVE_PREFIX:2pp]": 32144,
|
123 |
"[IMPERATIVE_PREFIX:2ps]": 32153,
|
|
|
124 |
"[NEGATION_PARTICLE:NA]": 32152,
|
125 |
"[NEGATION_PARTICLE:UME]": 32135,
|
126 |
"[NEGATION_PREFIX]": 32143,
|
@@ -164,6 +165,7 @@
|
|
164 |
"[SUBJECT_PREFIX:2pp]": 32119,
|
165 |
"[SUBJECT_PREFIX:2ps]": 32130,
|
166 |
"[SUBJECT_PREFIX:3p]": 32171,
|
|
|
167 |
"a'e": 32127,
|
168 |
"amo": 32104,
|
169 |
"bo": 32110,
|
|
|
121 |
"[GERUND_SUFFIX:CLASS_2:ORAL_VOWEL_ENDING]": 32170,
|
122 |
"[IMPERATIVE_PREFIX:2pp]": 32144,
|
123 |
"[IMPERATIVE_PREFIX:2ps]": 32153,
|
124 |
+
"[MAIN_VERB]": 32196,
|
125 |
"[NEGATION_PARTICLE:NA]": 32152,
|
126 |
"[NEGATION_PARTICLE:UME]": 32135,
|
127 |
"[NEGATION_PREFIX]": 32143,
|
|
|
165 |
"[SUBJECT_PREFIX:2pp]": 32119,
|
166 |
"[SUBJECT_PREFIX:2ps]": 32130,
|
167 |
"[SUBJECT_PREFIX:3p]": 32171,
|
168 |
+
"[SUB_VERB]": 32197,
|
169 |
"a'e": 32127,
|
170 |
"amo": 32104,
|
171 |
"bo": 32110,
|
config.json
CHANGED
@@ -57,5 +57,5 @@
|
|
57 |
"torch_dtype": "float32",
|
58 |
"transformers_version": "4.38.1",
|
59 |
"use_cache": true,
|
60 |
-
"vocab_size":
|
61 |
}
|
|
|
57 |
"torch_dtype": "float32",
|
58 |
"transformers_version": "4.38.1",
|
59 |
"use_cache": true,
|
60 |
+
"vocab_size": 32198
|
61 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c8b330e8b3e8fbff4a45942c27fe14746e74b2cdbc5b4027ddca51fcbbf9c2b
|
3 |
+
size 242185256
|
special_tokens_map.json
CHANGED
@@ -119,6 +119,13 @@
|
|
119 |
"rstrip": false,
|
120 |
"single_word": false
|
121 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
{
|
123 |
"content": "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
|
124 |
"lstrip": false,
|
@@ -168,6 +175,13 @@
|
|
168 |
"rstrip": false,
|
169 |
"single_word": false
|
170 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
{
|
172 |
"content": "[GERUND_SUFFIX:CLASS_1:IYU]",
|
173 |
"lstrip": false,
|
@@ -190,14 +204,14 @@
|
|
190 |
"single_word": false
|
191 |
},
|
192 |
{
|
193 |
-
"content": "[
|
194 |
"lstrip": false,
|
195 |
"normalized": false,
|
196 |
"rstrip": false,
|
197 |
"single_word": false
|
198 |
},
|
199 |
{
|
200 |
-
"content": "[
|
201 |
"lstrip": false,
|
202 |
"normalized": false,
|
203 |
"rstrip": false,
|
@@ -246,7 +260,7 @@
|
|
246 |
"single_word": false
|
247 |
},
|
248 |
{
|
249 |
-
"content": "[SUBJECT:
|
250 |
"lstrip": false,
|
251 |
"normalized": false,
|
252 |
"rstrip": false,
|
@@ -260,7 +274,7 @@
|
|
260 |
"single_word": false
|
261 |
},
|
262 |
{
|
263 |
-
"content": "[SUBJECT:
|
264 |
"lstrip": false,
|
265 |
"normalized": false,
|
266 |
"rstrip": false,
|
@@ -358,14 +372,14 @@
|
|
358 |
"single_word": false
|
359 |
},
|
360 |
{
|
361 |
-
"content": "
|
362 |
"lstrip": false,
|
363 |
"normalized": false,
|
364 |
"rstrip": false,
|
365 |
"single_word": false
|
366 |
},
|
367 |
{
|
368 |
-
"content": "
|
369 |
"lstrip": false,
|
370 |
"normalized": false,
|
371 |
"rstrip": false,
|
@@ -428,14 +442,14 @@
|
|
428 |
"single_word": false
|
429 |
},
|
430 |
{
|
431 |
-
"content": "
|
432 |
"lstrip": false,
|
433 |
"normalized": false,
|
434 |
"rstrip": false,
|
435 |
"single_word": false
|
436 |
},
|
437 |
{
|
438 |
-
"content": "
|
439 |
"lstrip": false,
|
440 |
"normalized": false,
|
441 |
"rstrip": false,
|
@@ -526,7 +540,7 @@
|
|
526 |
"single_word": false
|
527 |
},
|
528 |
{
|
529 |
-
"content": "
|
530 |
"lstrip": false,
|
531 |
"normalized": false,
|
532 |
"rstrip": false,
|
@@ -540,21 +554,21 @@
|
|
540 |
"single_word": false
|
541 |
},
|
542 |
{
|
543 |
-
"content": "
|
544 |
"lstrip": false,
|
545 |
"normalized": false,
|
546 |
"rstrip": false,
|
547 |
"single_word": false
|
548 |
},
|
549 |
{
|
550 |
-
"content": "
|
551 |
"lstrip": false,
|
552 |
"normalized": false,
|
553 |
"rstrip": false,
|
554 |
"single_word": false
|
555 |
},
|
556 |
{
|
557 |
-
"content": "
|
558 |
"lstrip": false,
|
559 |
"normalized": false,
|
560 |
"rstrip": false,
|
@@ -659,14 +673,14 @@
|
|
659 |
"single_word": false
|
660 |
},
|
661 |
{
|
662 |
-
"content": "[
|
663 |
"lstrip": false,
|
664 |
"normalized": false,
|
665 |
"rstrip": false,
|
666 |
"single_word": false
|
667 |
},
|
668 |
{
|
669 |
-
"content": "[
|
670 |
"lstrip": false,
|
671 |
"normalized": false,
|
672 |
"rstrip": false,
|
|
|
119 |
"rstrip": false,
|
120 |
"single_word": false
|
121 |
},
|
122 |
+
{
|
123 |
+
"content": "[MAIN_VERB]",
|
124 |
+
"lstrip": false,
|
125 |
+
"normalized": false,
|
126 |
+
"rstrip": false,
|
127 |
+
"single_word": false
|
128 |
+
},
|
129 |
{
|
130 |
"content": "[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
|
131 |
"lstrip": false,
|
|
|
175 |
"rstrip": false,
|
176 |
"single_word": false
|
177 |
},
|
178 |
+
{
|
179 |
+
"content": "[SUB_VERB]",
|
180 |
+
"lstrip": false,
|
181 |
+
"normalized": false,
|
182 |
+
"rstrip": false,
|
183 |
+
"single_word": false
|
184 |
+
},
|
185 |
{
|
186 |
"content": "[GERUND_SUFFIX:CLASS_1:IYU]",
|
187 |
"lstrip": false,
|
|
|
204 |
"single_word": false
|
205 |
},
|
206 |
{
|
207 |
+
"content": "[GERUND_SUBJECT_PREFIX:1ppe]",
|
208 |
"lstrip": false,
|
209 |
"normalized": false,
|
210 |
"rstrip": false,
|
211 |
"single_word": false
|
212 |
},
|
213 |
{
|
214 |
+
"content": "[NEGATION_SUFFIX]",
|
215 |
"lstrip": false,
|
216 |
"normalized": false,
|
217 |
"rstrip": false,
|
|
|
260 |
"single_word": false
|
261 |
},
|
262 |
{
|
263 |
+
"content": "[SUBJECT:1ppe]",
|
264 |
"lstrip": false,
|
265 |
"normalized": false,
|
266 |
"rstrip": false,
|
|
|
274 |
"single_word": false
|
275 |
},
|
276 |
{
|
277 |
+
"content": "[SUBJECT:1ps]",
|
278 |
"lstrip": false,
|
279 |
"normalized": false,
|
280 |
"rstrip": false,
|
|
|
372 |
"single_word": false
|
373 |
},
|
374 |
{
|
375 |
+
"content": "[OBJECT_MARKER:3p:DEFAULT]",
|
376 |
"lstrip": false,
|
377 |
"normalized": false,
|
378 |
"rstrip": false,
|
379 |
"single_word": false
|
380 |
},
|
381 |
{
|
382 |
+
"content": "opo",
|
383 |
"lstrip": false,
|
384 |
"normalized": false,
|
385 |
"rstrip": false,
|
|
|
442 |
"single_word": false
|
443 |
},
|
444 |
{
|
445 |
+
"content": "namo",
|
446 |
"lstrip": false,
|
447 |
"normalized": false,
|
448 |
"rstrip": false,
|
449 |
"single_word": false
|
450 |
},
|
451 |
{
|
452 |
+
"content": "endé",
|
453 |
"lstrip": false,
|
454 |
"normalized": false,
|
455 |
"rstrip": false,
|
|
|
540 |
"single_word": false
|
541 |
},
|
542 |
{
|
543 |
+
"content": "[SUBJECT_PREFIX:1ps]",
|
544 |
"lstrip": false,
|
545 |
"normalized": false,
|
546 |
"rstrip": false,
|
|
|
554 |
"single_word": false
|
555 |
},
|
556 |
{
|
557 |
+
"content": "ixé",
|
558 |
"lstrip": false,
|
559 |
"normalized": false,
|
560 |
"rstrip": false,
|
561 |
"single_word": false
|
562 |
},
|
563 |
{
|
564 |
+
"content": "pa",
|
565 |
"lstrip": false,
|
566 |
"normalized": false,
|
567 |
"rstrip": false,
|
568 |
"single_word": false
|
569 |
},
|
570 |
{
|
571 |
+
"content": "ramo",
|
572 |
"lstrip": false,
|
573 |
"normalized": false,
|
574 |
"rstrip": false,
|
|
|
673 |
"single_word": false
|
674 |
},
|
675 |
{
|
676 |
+
"content": "[PLURIFORM_PREFIX:R]",
|
677 |
"lstrip": false,
|
678 |
"normalized": false,
|
679 |
"rstrip": false,
|
680 |
"single_word": false
|
681 |
},
|
682 |
{
|
683 |
+
"content": "[SUBJECT:2ps]",
|
684 |
"lstrip": false,
|
685 |
"normalized": false,
|
686 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
@@ -1680,6 +1680,22 @@
|
|
1680 |
"rstrip": false,
|
1681 |
"single_word": false,
|
1682 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1683 |
}
|
1684 |
},
|
1685 |
"additional_special_tokens": [
|
@@ -1700,6 +1716,7 @@
|
|
1700 |
"[NEGATION_PARTICLE:NA]",
|
1701 |
"[IMPERATIVE_PREFIX:2ps]",
|
1702 |
"îos",
|
|
|
1703 |
"[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
|
1704 |
"[PLURIFORM_PREFIX:S]",
|
1705 |
"û",
|
@@ -1707,20 +1724,21 @@
|
|
1707 |
"[OBJECT:1ppi]",
|
1708 |
"abo",
|
1709 |
"[GERUND_SUFFIX:CLASS_1]",
|
|
|
1710 |
"[GERUND_SUFFIX:CLASS_1:IYU]",
|
1711 |
"îo",
|
1712 |
"i",
|
1713 |
-
"[NEGATION_SUFFIX]",
|
1714 |
"[GERUND_SUBJECT_PREFIX:1ppe]",
|
|
|
1715 |
"[SUBJECT:3p]",
|
1716 |
"[IMPERATIVE_PREFIX:2pp]",
|
1717 |
"́",
|
1718 |
"xe",
|
1719 |
"a'e",
|
1720 |
"t",
|
1721 |
-
"[SUBJECT:1ps]",
|
1722 |
-
"a",
|
1723 |
"[SUBJECT:1ppe]",
|
|
|
|
|
1724 |
"[SUBJECT_PREFIX:1ppe]",
|
1725 |
"amo",
|
1726 |
"[OBJECT:2ps]",
|
@@ -1734,8 +1752,8 @@
|
|
1734 |
"[PERMISSIVE_PREFIX:VOWEL]",
|
1735 |
"pe",
|
1736 |
"nde",
|
1737 |
-
"opo",
|
1738 |
"[OBJECT_MARKER:3p:DEFAULT]",
|
|
|
1739 |
"[GERUND_SUBJECT_PREFIX:2ps]",
|
1740 |
"[OBJECT:3p]",
|
1741 |
"îandé",
|
@@ -1744,8 +1762,8 @@
|
|
1744 |
"[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
|
1745 |
"[OBJECT_MARKER:3p:MONOSYLLABIC]",
|
1746 |
"peîepé",
|
1747 |
-
"endé",
|
1748 |
"namo",
|
|
|
1749 |
"[GERUND_SUFFIX:CLASS_1:R]",
|
1750 |
"[SUBJECT:2pp:OBJECT_1P]",
|
1751 |
"mo",
|
@@ -1758,11 +1776,11 @@
|
|
1758 |
"[SUBJECT_PREFIX:3p]",
|
1759 |
"r",
|
1760 |
"[NEGATION_SUFFIX:VOWEL_ENDING]",
|
1761 |
-
"ixé",
|
1762 |
-
"[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
|
1763 |
"[SUBJECT_PREFIX:1ps]",
|
1764 |
-
"
|
|
|
1765 |
"pa",
|
|
|
1766 |
"ere",
|
1767 |
"[OBJECT:REFLEXIVE]",
|
1768 |
"[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
|
@@ -1777,8 +1795,8 @@
|
|
1777 |
"[PERMISSIVE_PREFIX:CONSONANT]",
|
1778 |
"oré",
|
1779 |
"umẽ",
|
1780 |
-
"[SUBJECT:2ps]",
|
1781 |
"[PLURIFORM_PREFIX:R]",
|
|
|
1782 |
"o",
|
1783 |
"[SUBJECT:2ps:OBJECT_1P]",
|
1784 |
"îa",
|
|
|
1680 |
"rstrip": false,
|
1681 |
"single_word": false,
|
1682 |
"special": true
|
1683 |
+
},
|
1684 |
+
"32196": {
|
1685 |
+
"content": "[MAIN_VERB]",
|
1686 |
+
"lstrip": false,
|
1687 |
+
"normalized": false,
|
1688 |
+
"rstrip": false,
|
1689 |
+
"single_word": false,
|
1690 |
+
"special": true
|
1691 |
+
},
|
1692 |
+
"32197": {
|
1693 |
+
"content": "[SUB_VERB]",
|
1694 |
+
"lstrip": false,
|
1695 |
+
"normalized": false,
|
1696 |
+
"rstrip": false,
|
1697 |
+
"single_word": false,
|
1698 |
+
"special": true
|
1699 |
}
|
1700 |
},
|
1701 |
"additional_special_tokens": [
|
|
|
1716 |
"[NEGATION_PARTICLE:NA]",
|
1717 |
"[IMPERATIVE_PREFIX:2ps]",
|
1718 |
"îos",
|
1719 |
+
"[MAIN_VERB]",
|
1720 |
"[GERUND_SUFFIX:CLASS_1:NASAL_IYU]",
|
1721 |
"[PLURIFORM_PREFIX:S]",
|
1722 |
"û",
|
|
|
1724 |
"[OBJECT:1ppi]",
|
1725 |
"abo",
|
1726 |
"[GERUND_SUFFIX:CLASS_1]",
|
1727 |
+
"[SUB_VERB]",
|
1728 |
"[GERUND_SUFFIX:CLASS_1:IYU]",
|
1729 |
"îo",
|
1730 |
"i",
|
|
|
1731 |
"[GERUND_SUBJECT_PREFIX:1ppe]",
|
1732 |
+
"[NEGATION_SUFFIX]",
|
1733 |
"[SUBJECT:3p]",
|
1734 |
"[IMPERATIVE_PREFIX:2pp]",
|
1735 |
"́",
|
1736 |
"xe",
|
1737 |
"a'e",
|
1738 |
"t",
|
|
|
|
|
1739 |
"[SUBJECT:1ppe]",
|
1740 |
+
"a",
|
1741 |
+
"[SUBJECT:1ps]",
|
1742 |
"[SUBJECT_PREFIX:1ppe]",
|
1743 |
"amo",
|
1744 |
"[OBJECT:2ps]",
|
|
|
1752 |
"[PERMISSIVE_PREFIX:VOWEL]",
|
1753 |
"pe",
|
1754 |
"nde",
|
|
|
1755 |
"[OBJECT_MARKER:3p:DEFAULT]",
|
1756 |
+
"opo",
|
1757 |
"[GERUND_SUBJECT_PREFIX:2ps]",
|
1758 |
"[OBJECT:3p]",
|
1759 |
"îandé",
|
|
|
1762 |
"[CIRCUMSTANTIAL_SUFFIX:VOWEL_ENDING]",
|
1763 |
"[OBJECT_MARKER:3p:MONOSYLLABIC]",
|
1764 |
"peîepé",
|
|
|
1765 |
"namo",
|
1766 |
+
"endé",
|
1767 |
"[GERUND_SUFFIX:CLASS_1:R]",
|
1768 |
"[SUBJECT:2pp:OBJECT_1P]",
|
1769 |
"mo",
|
|
|
1776 |
"[SUBJECT_PREFIX:3p]",
|
1777 |
"r",
|
1778 |
"[NEGATION_SUFFIX:VOWEL_ENDING]",
|
|
|
|
|
1779 |
"[SUBJECT_PREFIX:1ps]",
|
1780 |
+
"[GERUND_SUFFIX:CLASS_1:ORAL_VOWEL]",
|
1781 |
+
"ixé",
|
1782 |
"pa",
|
1783 |
+
"ramo",
|
1784 |
"ere",
|
1785 |
"[OBJECT:REFLEXIVE]",
|
1786 |
"[GERUND_SUFFIX:CLASS_1:NASAL_VOWEL]",
|
|
|
1795 |
"[PERMISSIVE_PREFIX:CONSONANT]",
|
1796 |
"oré",
|
1797 |
"umẽ",
|
|
|
1798 |
"[PLURIFORM_PREFIX:R]",
|
1799 |
+
"[SUBJECT:2ps]",
|
1800 |
"o",
|
1801 |
"[SUBJECT:2ps:OBJECT_1P]",
|
1802 |
"îa",
|