KoichiYasuoka commited on
Commit
74fd357
1 Parent(s): fab470f

model improved

Browse files
Files changed (4) hide show
  1. config.json +144 -159
  2. pytorch_model.bin +2 -2
  3. supar.model +2 -2
  4. tokenizer.json +0 -0
config.json CHANGED
@@ -27,26 +27,26 @@
27
  "15": "B-NOUN",
28
  "16": "B-NOUN+ADP",
29
  "17": "B-NOUN+ADP+NOUN",
30
- "18": "B-NOUN+ADV",
31
- "19": "B-NOUN+NOUN",
32
- "20": "B-NOUN+VERB",
33
- "21": "B-NUM",
34
- "22": "B-NUM+NOUN",
35
- "23": "B-PART",
36
- "24": "B-PART+NOUN",
37
- "25": "B-PART+VERB",
38
- "26": "B-PRON",
39
- "27": "B-PROPN",
40
- "28": "B-PUNCT",
41
- "29": "B-SCONJ",
42
- "30": "B-SCONJ+ADV",
43
- "31": "B-VERB",
44
- "32": "B-VERB+AUX",
45
- "33": "B-VERB+NOUN",
46
- "34": "B-VERB+PART",
47
- "35": "B-VERB+SCONJ",
48
- "36": "B-VERT",
49
- "37": "B-X",
50
  "38": "CCONJ",
51
  "39": "DET",
52
  "40": "DET+NOUN",
@@ -65,47 +65,45 @@
65
  "53": "I-NOUN",
66
  "54": "I-NOUN+ADP",
67
  "55": "I-NOUN+ADP+NOUN",
68
- "56": "I-NOUN+ADV",
69
- "57": "I-NOUN+NOUN",
70
- "58": "I-NOUN+VERB",
71
- "59": "I-NUM",
72
- "60": "I-NUM+NOUN",
73
- "61": "I-PART",
74
- "62": "I-PART+NOUN",
75
- "63": "I-PART+VERB",
76
- "64": "I-PRON",
77
- "65": "I-PROPN",
78
- "66": "I-PUNCT",
79
- "67": "I-SCONJ",
80
- "68": "I-SCONJ+ADV",
81
- "69": "I-VERB",
82
- "70": "I-VERB+AUX",
83
- "71": "I-VERB+NOUN",
84
- "72": "I-VERB+PART",
85
- "73": "I-VERB+SCONJ",
86
- "74": "I-VERT",
87
- "75": "I-X",
88
  "76": "INTJ",
89
  "77": "NOUN",
90
  "78": "NOUN+ADP",
91
  "79": "NOUN+NOUN",
92
  "80": "NOUN+VERB",
93
  "81": "NUM",
94
- "82": "NUM+VERB+NOUN",
95
- "83": "PART",
96
- "84": "PART+NOUN",
97
- "85": "PART+VERB",
98
- "86": "PROPN",
99
- "87": "PUNCT",
100
- "88": "SCONJ",
101
- "89": "SYM",
102
- "90": "VERB",
103
- "91": "VERB+AUX",
104
- "92": "VERB+NOUN",
105
- "93": "VERB+PART",
106
- "94": "VERB+VERB",
107
- "95": "VERT",
108
- "96": "X"
109
  },
110
  "initializer_range": 0.02,
111
  "intermediate_size": 3072,
@@ -128,26 +126,26 @@
128
  "B-NOUN": 15,
129
  "B-NOUN+ADP": 16,
130
  "B-NOUN+ADP+NOUN": 17,
131
- "B-NOUN+ADV": 18,
132
- "B-NOUN+NOUN": 19,
133
- "B-NOUN+VERB": 20,
134
- "B-NUM": 21,
135
- "B-NUM+NOUN": 22,
136
- "B-PART": 23,
137
- "B-PART+NOUN": 24,
138
- "B-PART+VERB": 25,
139
- "B-PRON": 26,
140
- "B-PROPN": 27,
141
- "B-PUNCT": 28,
142
- "B-SCONJ": 29,
143
- "B-SCONJ+ADV": 30,
144
- "B-VERB": 31,
145
- "B-VERB+AUX": 32,
146
- "B-VERB+NOUN": 33,
147
- "B-VERB+PART": 34,
148
- "B-VERB+SCONJ": 35,
149
- "B-VERT": 36,
150
- "B-X": 37,
151
  "CCONJ": 38,
152
  "DET": 39,
153
  "DET+NOUN": 40,
@@ -166,47 +164,45 @@
166
  "I-NOUN": 53,
167
  "I-NOUN+ADP": 54,
168
  "I-NOUN+ADP+NOUN": 55,
169
- "I-NOUN+ADV": 56,
170
- "I-NOUN+NOUN": 57,
171
- "I-NOUN+VERB": 58,
172
- "I-NUM": 59,
173
- "I-NUM+NOUN": 60,
174
- "I-PART": 61,
175
- "I-PART+NOUN": 62,
176
- "I-PART+VERB": 63,
177
- "I-PRON": 64,
178
- "I-PROPN": 65,
179
- "I-PUNCT": 66,
180
- "I-SCONJ": 67,
181
- "I-SCONJ+ADV": 68,
182
- "I-VERB": 69,
183
- "I-VERB+AUX": 70,
184
- "I-VERB+NOUN": 71,
185
- "I-VERB+PART": 72,
186
- "I-VERB+SCONJ": 73,
187
- "I-VERT": 74,
188
- "I-X": 75,
189
  "INTJ": 76,
190
  "NOUN": 77,
191
  "NOUN+ADP": 78,
192
  "NOUN+NOUN": 79,
193
  "NOUN+VERB": 80,
194
  "NUM": 81,
195
- "NUM+VERB+NOUN": 82,
196
- "PART": 83,
197
- "PART+NOUN": 84,
198
- "PART+VERB": 85,
199
- "PROPN": 86,
200
- "PUNCT": 87,
201
- "SCONJ": 88,
202
- "SYM": 89,
203
- "VERB": 90,
204
- "VERB+AUX": 91,
205
- "VERB+NOUN": 92,
206
- "VERB+PART": 93,
207
- "VERB+VERB": 94,
208
- "VERT": 95,
209
- "X": 96
210
  },
211
  "layer_norm_eps": 1e-07,
212
  "max_position_embeddings": 512,
@@ -298,18 +294,22 @@
298
  "Oro",
299
  "wano"
300
  ],
301
- "Pet-samaketa": [
302
- "Pet-samake",
303
  "ta"
304
  ],
305
- "Shoita": [
306
- "Shoi",
307
  "ta"
308
  ],
309
  "Soita": [
310
  "Soi",
311
  "ta"
312
  ],
 
 
 
 
313
  "keseta": [
314
  "kese",
315
  "ta"
@@ -326,6 +326,10 @@
326
  "oro",
327
  "wano"
328
  ],
 
 
 
 
329
  "otta": [
330
  "ot",
331
  "ta"
@@ -334,10 +338,6 @@
334
  "samake",
335
  "ta"
336
  ],
337
- "shoita": [
338
- "shoi",
339
- "ta"
340
- ],
341
  "soyta": [
342
  "soy",
343
  "ta"
@@ -363,6 +363,13 @@
363
  "puray"
364
  ]
365
  },
 
 
 
 
 
 
 
366
  "NOUN+ADV": {
367
  "Tambeta ne": [
368
  "Tambe",
@@ -479,33 +486,19 @@
479
  "to"
480
  ]
481
  },
482
- "NUM+VERB+NOUN": {
483
- "Shineanto": [
484
- "Shine",
485
- "an",
486
- "to"
487
- ],
488
- "sineanto": [
489
- "sine",
490
- "an",
491
- "to"
492
  ]
493
  },
494
  "PART+NOUN": {
495
  "=anpe": [
496
  "=an",
497
  "pe"
498
- ],
499
- "shichorpok": [
500
- "shi",
501
- "chorpok"
502
  ]
503
  },
504
  "PART+VERB": {
505
- "Chirushka": [
506
- "Chi",
507
- "rushka"
508
- ],
509
  "ainu-wap": [
510
  "a",
511
  "inu-wap"
@@ -529,10 +522,6 @@
529
  "karapa": [
530
  "k",
531
  "arapa"
532
- ],
533
- "shiokote": [
534
- "shi",
535
- "okote"
536
  ]
537
  },
538
  "SCONJ+ADV": {
@@ -542,14 +531,6 @@
542
  ]
543
  },
544
  "VERB+AUX": {
545
- "poppeta ashinnangoro": [
546
- "poppeta ashin",
547
- "nangoro"
548
- ],
549
- "poppetaasinnankor": [
550
- "poppetaasin",
551
- "nankor"
552
- ],
553
  "sattek": [
554
  "sat",
555
  "tek"
@@ -568,13 +549,9 @@
568
  "an",
569
  "pe"
570
  ],
571
- "ashbe": [
572
- "ash",
573
- "be"
574
- ],
575
- "aspe": [
576
- "as",
577
- "pe"
578
  ],
579
  "h\u00e9sep\u00e1ha": [
580
  "h\u00e9se",
@@ -584,9 +561,9 @@
584
  "kar",
585
  "i"
586
  ],
587
- "ohasiri": [
588
- "oha",
589
- "siri"
590
  ],
591
  "wenpuri": [
592
  "wen",
@@ -598,9 +575,17 @@
598
  "kar",
599
  "i"
600
  ],
 
 
 
 
601
  "sapash": [
602
  "sap",
603
  "ash"
 
 
 
 
604
  ]
605
  },
606
  "VERB+SCONJ": {
@@ -629,5 +614,5 @@
629
  "torch_dtype": "float32",
630
  "transformers_version": "4.22.1",
631
  "type_vocab_size": 0,
632
- "vocab_size": 5093
633
  }
 
27
  "15": "B-NOUN",
28
  "16": "B-NOUN+ADP",
29
  "17": "B-NOUN+ADP+NOUN",
30
+ "18": "B-NOUN+ADP+VERB",
31
+ "19": "B-NOUN+ADV",
32
+ "20": "B-NOUN+NOUN",
33
+ "21": "B-NOUN+VERB",
34
+ "22": "B-NUM",
35
+ "23": "B-NUM+NOUN",
36
+ "24": "B-PART",
37
+ "25": "B-PART+AUX",
38
+ "26": "B-PART+NOUN",
39
+ "27": "B-PART+VERB",
40
+ "28": "B-PRON",
41
+ "29": "B-PROPN",
42
+ "30": "B-PUNCT",
43
+ "31": "B-SCONJ",
44
+ "32": "B-SCONJ+ADV",
45
+ "33": "B-VERB",
46
+ "34": "B-VERB+NOUN",
47
+ "35": "B-VERB+PART",
48
+ "36": "B-VERB+SCONJ",
49
+ "37": "B-VERT",
50
  "38": "CCONJ",
51
  "39": "DET",
52
  "40": "DET+NOUN",
 
65
  "53": "I-NOUN",
66
  "54": "I-NOUN+ADP",
67
  "55": "I-NOUN+ADP+NOUN",
68
+ "56": "I-NOUN+ADP+VERB",
69
+ "57": "I-NOUN+ADV",
70
+ "58": "I-NOUN+NOUN",
71
+ "59": "I-NOUN+VERB",
72
+ "60": "I-NUM",
73
+ "61": "I-NUM+NOUN",
74
+ "62": "I-PART",
75
+ "63": "I-PART+AUX",
76
+ "64": "I-PART+NOUN",
77
+ "65": "I-PART+VERB",
78
+ "66": "I-PRON",
79
+ "67": "I-PROPN",
80
+ "68": "I-PUNCT",
81
+ "69": "I-SCONJ",
82
+ "70": "I-SCONJ+ADV",
83
+ "71": "I-VERB",
84
+ "72": "I-VERB+NOUN",
85
+ "73": "I-VERB+PART",
86
+ "74": "I-VERB+SCONJ",
87
+ "75": "I-VERT",
88
  "76": "INTJ",
89
  "77": "NOUN",
90
  "78": "NOUN+ADP",
91
  "79": "NOUN+NOUN",
92
  "80": "NOUN+VERB",
93
  "81": "NUM",
94
+ "82": "PART",
95
+ "83": "PART+VERB",
96
+ "84": "PROPN",
97
+ "85": "PUNCT",
98
+ "86": "SCONJ",
99
+ "87": "SYM",
100
+ "88": "VERB",
101
+ "89": "VERB+AUX",
102
+ "90": "VERB+NOUN",
103
+ "91": "VERB+PART",
104
+ "92": "VERB+VERB",
105
+ "93": "VERT",
106
+ "94": "X"
 
 
107
  },
108
  "initializer_range": 0.02,
109
  "intermediate_size": 3072,
 
126
  "B-NOUN": 15,
127
  "B-NOUN+ADP": 16,
128
  "B-NOUN+ADP+NOUN": 17,
129
+ "B-NOUN+ADP+VERB": 18,
130
+ "B-NOUN+ADV": 19,
131
+ "B-NOUN+NOUN": 20,
132
+ "B-NOUN+VERB": 21,
133
+ "B-NUM": 22,
134
+ "B-NUM+NOUN": 23,
135
+ "B-PART": 24,
136
+ "B-PART+AUX": 25,
137
+ "B-PART+NOUN": 26,
138
+ "B-PART+VERB": 27,
139
+ "B-PRON": 28,
140
+ "B-PROPN": 29,
141
+ "B-PUNCT": 30,
142
+ "B-SCONJ": 31,
143
+ "B-SCONJ+ADV": 32,
144
+ "B-VERB": 33,
145
+ "B-VERB+NOUN": 34,
146
+ "B-VERB+PART": 35,
147
+ "B-VERB+SCONJ": 36,
148
+ "B-VERT": 37,
149
  "CCONJ": 38,
150
  "DET": 39,
151
  "DET+NOUN": 40,
 
164
  "I-NOUN": 53,
165
  "I-NOUN+ADP": 54,
166
  "I-NOUN+ADP+NOUN": 55,
167
+ "I-NOUN+ADP+VERB": 56,
168
+ "I-NOUN+ADV": 57,
169
+ "I-NOUN+NOUN": 58,
170
+ "I-NOUN+VERB": 59,
171
+ "I-NUM": 60,
172
+ "I-NUM+NOUN": 61,
173
+ "I-PART": 62,
174
+ "I-PART+AUX": 63,
175
+ "I-PART+NOUN": 64,
176
+ "I-PART+VERB": 65,
177
+ "I-PRON": 66,
178
+ "I-PROPN": 67,
179
+ "I-PUNCT": 68,
180
+ "I-SCONJ": 69,
181
+ "I-SCONJ+ADV": 70,
182
+ "I-VERB": 71,
183
+ "I-VERB+NOUN": 72,
184
+ "I-VERB+PART": 73,
185
+ "I-VERB+SCONJ": 74,
186
+ "I-VERT": 75,
187
  "INTJ": 76,
188
  "NOUN": 77,
189
  "NOUN+ADP": 78,
190
  "NOUN+NOUN": 79,
191
  "NOUN+VERB": 80,
192
  "NUM": 81,
193
+ "PART": 82,
194
+ "PART+VERB": 83,
195
+ "PROPN": 84,
196
+ "PUNCT": 85,
197
+ "SCONJ": 86,
198
+ "SYM": 87,
199
+ "VERB": 88,
200
+ "VERB+AUX": 89,
201
+ "VERB+NOUN": 90,
202
+ "VERB+PART": 91,
203
+ "VERB+VERB": 92,
204
+ "VERT": 93,
205
+ "X": 94
 
 
206
  },
207
  "layer_norm_eps": 1e-07,
208
  "max_position_embeddings": 512,
 
294
  "Oro",
295
  "wano"
296
  ],
297
+ "Oshmaketa": [
298
+ "Oshmake",
299
  "ta"
300
  ],
301
+ "Pet-samaketa": [
302
+ "Pet-samake",
303
  "ta"
304
  ],
305
  "Soita": [
306
  "Soi",
307
  "ta"
308
  ],
309
+ "cheppone": [
310
+ "cheppo",
311
+ "ne"
312
+ ],
313
  "keseta": [
314
  "kese",
315
  "ta"
 
326
  "oro",
327
  "wano"
328
  ],
329
+ "oshmaketa": [
330
+ "oshmake",
331
+ "ta"
332
+ ],
333
  "otta": [
334
  "ot",
335
  "ta"
 
338
  "samake",
339
  "ta"
340
  ],
 
 
 
 
341
  "soyta": [
342
  "soy",
343
  "ta"
 
363
  "puray"
364
  ]
365
  },
366
+ "NOUN+ADP+VERB": {
367
+ "soytaarpa": [
368
+ "soy",
369
+ "ta",
370
+ "arpa"
371
+ ]
372
+ },
373
  "NOUN+ADV": {
374
  "Tambeta ne": [
375
  "Tambe",
 
486
  "to"
487
  ]
488
  },
489
+ "PART+AUX": {
490
+ "chine": [
491
+ "chi",
492
+ "ne"
 
 
 
 
 
 
493
  ]
494
  },
495
  "PART+NOUN": {
496
  "=anpe": [
497
  "=an",
498
  "pe"
 
 
 
 
499
  ]
500
  },
501
  "PART+VERB": {
 
 
 
 
502
  "ainu-wap": [
503
  "a",
504
  "inu-wap"
 
522
  "karapa": [
523
  "k",
524
  "arapa"
 
 
 
 
525
  ]
526
  },
527
  "SCONJ+ADV": {
 
531
  ]
532
  },
533
  "VERB+AUX": {
 
 
 
 
 
 
 
 
534
  "sattek": [
535
  "sat",
536
  "tek"
 
549
  "an",
550
  "pe"
551
  ],
552
+ "anto": [
553
+ "an",
554
+ "to"
 
 
 
 
555
  ],
556
  "h\u00e9sep\u00e1ha": [
557
  "h\u00e9se",
 
561
  "kar",
562
  "i"
563
  ],
564
+ "ponchise": [
565
+ "pon",
566
+ "chise"
567
  ],
568
  "wenpuri": [
569
  "wen",
 
575
  "kar",
576
  "i"
577
  ],
578
+ "rokash": [
579
+ "rok",
580
+ "ash"
581
+ ],
582
  "sapash": [
583
  "sap",
584
  "ash"
585
+ ],
586
+ "shinotash": [
587
+ "shinot",
588
+ "ash"
589
  ]
590
  },
591
  "VERB+SCONJ": {
 
614
  "torch_dtype": "float32",
615
  "transformers_version": "4.22.1",
616
  "type_vocab_size": 0,
617
+ "vocab_size": 5092
618
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d16d2e33c010db9c1e11f8c3958ba24e6ebd0fff465cdceccb0133af4c5f21b9
3
- size 416098451
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:130950825f157a277a247ff50be3c172941fa762e5f44a9a209b404021e9ac08
3
+ size 416089171
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12d7290b028466f77fd2c23367731a530dc9d0b146977511d94f39e3aae9a543
3
- size 461042443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:babbe8b36455e1b5441261b62b7bbc48da3082d0d0980788b0244bddc0f6a04b
3
+ size 461045771
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff