KoichiYasuoka commited on
Commit
6f50a3e
1 Parent(s): a5aa90a

model improved

Browse files
Files changed (4) hide show
  1. config.json +152 -165
  2. pytorch_model.bin +2 -2
  3. supar.model +2 -2
  4. tokenizer.json +0 -0
config.json CHANGED
@@ -21,27 +21,27 @@
21
  "9": "B-CCONJ",
22
  "10": "B-DET",
23
  "11": "B-DET+NOUN",
24
- "12": "B-DET+VERB",
25
- "13": "B-INFR.EV",
26
- "14": "B-INTJ",
27
- "15": "B-NOUN",
28
- "16": "B-NOUN+ADP",
29
- "17": "B-NOUN+ADP+NOUN",
30
  "18": "B-NOUN+ADV",
31
  "19": "B-NOUN+NOUN",
32
  "20": "B-NOUN+VERB",
33
  "21": "B-NUM",
34
  "22": "B-NUM+NOUN",
35
  "23": "B-PART",
36
- "24": "B-PART+NOUN",
37
- "25": "B-PART+VERB",
38
- "26": "B-PRON",
39
- "27": "B-PROPN",
40
- "28": "B-PUNCT",
41
- "29": "B-SCONJ",
42
- "30": "B-SCONJ+ADV",
43
- "31": "B-VERB",
44
- "32": "B-VERB+AUX",
45
  "33": "B-VERB+NOUN",
46
  "34": "B-VERB+PART",
47
  "35": "B-VERB+SCONJ",
@@ -59,27 +59,27 @@
59
  "47": "I-CCONJ",
60
  "48": "I-DET",
61
  "49": "I-DET+NOUN",
62
- "50": "I-DET+VERB",
63
- "51": "I-INFR.EV",
64
- "52": "I-INTJ",
65
- "53": "I-NOUN",
66
- "54": "I-NOUN+ADP",
67
- "55": "I-NOUN+ADP+NOUN",
68
  "56": "I-NOUN+ADV",
69
  "57": "I-NOUN+NOUN",
70
  "58": "I-NOUN+VERB",
71
  "59": "I-NUM",
72
  "60": "I-NUM+NOUN",
73
  "61": "I-PART",
74
- "62": "I-PART+NOUN",
75
- "63": "I-PART+VERB",
76
- "64": "I-PRON",
77
- "65": "I-PROPN",
78
- "66": "I-PUNCT",
79
- "67": "I-SCONJ",
80
- "68": "I-SCONJ+ADV",
81
- "69": "I-VERB",
82
- "70": "I-VERB+AUX",
83
  "71": "I-VERB+NOUN",
84
  "72": "I-VERB+PART",
85
  "73": "I-VERB+SCONJ",
@@ -91,21 +91,19 @@
91
  "79": "NOUN+NOUN",
92
  "80": "NOUN+VERB",
93
  "81": "NUM",
94
- "82": "NUM+VERB+NOUN",
95
- "83": "PART",
96
- "84": "PART+NOUN",
97
- "85": "PART+VERB",
98
- "86": "PROPN",
99
- "87": "PUNCT",
100
- "88": "SCONJ",
101
- "89": "SYM",
102
- "90": "VERB",
103
- "91": "VERB+AUX",
104
- "92": "VERB+NOUN",
105
- "93": "VERB+PART",
106
- "94": "VERB+VERB",
107
- "95": "VERT",
108
- "96": "X"
109
  },
110
  "initializer_range": 0.02,
111
  "intermediate_size": 3072,
@@ -122,27 +120,27 @@
122
  "B-CCONJ": 9,
123
  "B-DET": 10,
124
  "B-DET+NOUN": 11,
125
- "B-DET+VERB": 12,
126
- "B-INFR.EV": 13,
127
- "B-INTJ": 14,
128
- "B-NOUN": 15,
129
- "B-NOUN+ADP": 16,
130
- "B-NOUN+ADP+NOUN": 17,
131
  "B-NOUN+ADV": 18,
132
  "B-NOUN+NOUN": 19,
133
  "B-NOUN+VERB": 20,
134
  "B-NUM": 21,
135
  "B-NUM+NOUN": 22,
136
  "B-PART": 23,
137
- "B-PART+NOUN": 24,
138
- "B-PART+VERB": 25,
139
- "B-PRON": 26,
140
- "B-PROPN": 27,
141
- "B-PUNCT": 28,
142
- "B-SCONJ": 29,
143
- "B-SCONJ+ADV": 30,
144
- "B-VERB": 31,
145
- "B-VERB+AUX": 32,
146
  "B-VERB+NOUN": 33,
147
  "B-VERB+PART": 34,
148
  "B-VERB+SCONJ": 35,
@@ -160,27 +158,27 @@
160
  "I-CCONJ": 47,
161
  "I-DET": 48,
162
  "I-DET+NOUN": 49,
163
- "I-DET+VERB": 50,
164
- "I-INFR.EV": 51,
165
- "I-INTJ": 52,
166
- "I-NOUN": 53,
167
- "I-NOUN+ADP": 54,
168
- "I-NOUN+ADP+NOUN": 55,
169
  "I-NOUN+ADV": 56,
170
  "I-NOUN+NOUN": 57,
171
  "I-NOUN+VERB": 58,
172
  "I-NUM": 59,
173
  "I-NUM+NOUN": 60,
174
  "I-PART": 61,
175
- "I-PART+NOUN": 62,
176
- "I-PART+VERB": 63,
177
- "I-PRON": 64,
178
- "I-PROPN": 65,
179
- "I-PUNCT": 66,
180
- "I-SCONJ": 67,
181
- "I-SCONJ+ADV": 68,
182
- "I-VERB": 69,
183
- "I-VERB+AUX": 70,
184
  "I-VERB+NOUN": 71,
185
  "I-VERB+PART": 72,
186
  "I-VERB+SCONJ": 73,
@@ -192,21 +190,19 @@
192
  "NOUN+NOUN": 79,
193
  "NOUN+VERB": 80,
194
  "NUM": 81,
195
- "NUM+VERB+NOUN": 82,
196
- "PART": 83,
197
- "PART+NOUN": 84,
198
- "PART+VERB": 85,
199
- "PROPN": 86,
200
- "PUNCT": 87,
201
- "SCONJ": 88,
202
- "SYM": 89,
203
- "VERB": 90,
204
- "VERB+AUX": 91,
205
- "VERB+NOUN": 92,
206
- "VERB+PART": 93,
207
- "VERB+VERB": 94,
208
- "VERT": 95,
209
- "X": 96
210
  },
211
  "layer_norm_eps": 1e-07,
212
  "max_position_embeddings": 512,
@@ -258,13 +254,9 @@
258
  "Tam",
259
  "pa"
260
  ],
261
- "oararke": [
262
- "oar",
263
- "arke"
264
- ],
265
- "oararkehe": [
266
- "oar",
267
- "arkehe"
268
  ],
269
  "tanto": [
270
  "tan",
@@ -283,41 +275,55 @@
283
  "an"
284
  ]
285
  },
286
- "DET+VERB": {
287
- "iyorun": [
288
- "iyor",
289
- "un"
290
- ]
291
- },
292
  "NOUN+ADP": {
293
  "Kunneiwano": [
294
  "Kunnei",
295
  "wano"
296
  ],
 
 
 
 
297
  "Orowano": [
298
  "Oro",
299
  "wano"
300
  ],
301
- "Pet-samaketa": [
302
- "Pet-samake",
303
  "ta"
304
  ],
305
- "Shoita": [
306
- "Shoi",
307
  "ta"
308
  ],
309
  "Soita": [
310
  "Soi",
311
  "ta"
312
  ],
 
 
 
 
313
  "keseta": [
314
  "kese",
315
  "ta"
316
  ],
 
 
 
 
317
  "kunneywano": [
318
  "kunney",
319
  "wano"
320
  ],
 
 
 
 
 
 
 
 
321
  "orowa": [
322
  "oro",
323
  "wa"
@@ -326,16 +332,20 @@
326
  "oro",
327
  "wano"
328
  ],
 
 
 
 
329
  "otta": [
330
  "ot",
331
  "ta"
332
  ],
333
- "samaketa": [
334
- "samake",
335
  "ta"
336
  ],
337
- "shoita": [
338
- "shoi",
339
  "ta"
340
  ],
341
  "soyta": [
@@ -345,10 +355,6 @@
345
  "tomta": [
346
  "tom",
347
  "ta"
348
- ],
349
- "tumukeheta": [
350
- "tumukehe",
351
- "ta"
352
  ]
353
  },
354
  "NOUN+ADP+NOUN": {
@@ -363,6 +369,13 @@
363
  "puray"
364
  ]
365
  },
 
 
 
 
 
 
 
366
  "NOUN+ADV": {
367
  "Tambeta ne": [
368
  "Tambe",
@@ -370,22 +383,18 @@
370
  ]
371
  },
372
  "NOUN+NOUN": {
373
- "Hinakoro": [
374
- "Hinak",
375
- "oro"
376
- ],
377
  "Petetoko": [
378
  "Pet",
379
  "etoko"
380
  ],
 
 
 
 
381
  "hekattar": [
382
  "hekat",
383
  "tar"
384
  ],
385
- "hinakoro": [
386
- "hinak",
387
- "oro"
388
- ],
389
  "inaanpe": [
390
  "inaan",
391
  "pe"
@@ -406,6 +415,10 @@
406
  "kamuy",
407
  "nis"
408
  ],
 
 
 
 
409
  "petetok": [
410
  "pet",
411
  "etok"
@@ -413,6 +426,10 @@
413
  "petetoko": [
414
  "pet",
415
  "etoko"
 
 
 
 
416
  ]
417
  },
418
  "NOUN+VERB": {
@@ -479,33 +496,19 @@
479
  "to"
480
  ]
481
  },
482
- "NUM+VERB+NOUN": {
483
- "Shineanto": [
484
- "Shine",
485
- "an",
486
- "to"
487
- ],
488
- "sineanto": [
489
- "sine",
490
- "an",
491
- "to"
492
  ]
493
  },
494
  "PART+NOUN": {
495
  "=anpe": [
496
  "=an",
497
  "pe"
498
- ],
499
- "shichorpok": [
500
- "shi",
501
- "chorpok"
502
  ]
503
  },
504
  "PART+VERB": {
505
- "Chirushka": [
506
- "Chi",
507
- "rushka"
508
- ],
509
  "ainu-wap": [
510
  "a",
511
  "inu-wap"
@@ -529,10 +532,6 @@
529
  "karapa": [
530
  "k",
531
  "arapa"
532
- ],
533
- "shiokote": [
534
- "shi",
535
- "okote"
536
  ]
537
  },
538
  "SCONJ+ADV": {
@@ -542,14 +541,6 @@
542
  ]
543
  },
544
  "VERB+AUX": {
545
- "poppeta ashinnangoro": [
546
- "poppeta ashin",
547
- "nangoro"
548
- ],
549
- "poppetaasinnankor": [
550
- "poppetaasin",
551
- "nankor"
552
- ],
553
  "sattek": [
554
  "sat",
555
  "tek"
@@ -568,13 +559,9 @@
568
  "an",
569
  "pe"
570
  ],
571
- "ashbe": [
572
- "ash",
573
- "be"
574
- ],
575
- "aspe": [
576
- "as",
577
- "pe"
578
  ],
579
  "h\u00e9sep\u00e1ha": [
580
  "h\u00e9se",
@@ -584,10 +571,6 @@
584
  "kar",
585
  "i"
586
  ],
587
- "ohasiri": [
588
- "oha",
589
- "siri"
590
- ],
591
  "wenpuri": [
592
  "wen",
593
  "puri"
@@ -605,6 +588,10 @@
605
  "sapash": [
606
  "sap",
607
  "ash"
 
 
 
 
608
  ]
609
  },
610
  "VERB+SCONJ": {
@@ -633,5 +620,5 @@
633
  "torch_dtype": "float32",
634
  "transformers_version": "4.22.1",
635
  "type_vocab_size": 0,
636
- "vocab_size": 5092
637
  }
 
21
  "9": "B-CCONJ",
22
  "10": "B-DET",
23
  "11": "B-DET+NOUN",
24
+ "12": "B-INFR.EV",
25
+ "13": "B-INTJ",
26
+ "14": "B-NOUN",
27
+ "15": "B-NOUN+ADP",
28
+ "16": "B-NOUN+ADP+NOUN",
29
+ "17": "B-NOUN+ADP+VERB",
30
  "18": "B-NOUN+ADV",
31
  "19": "B-NOUN+NOUN",
32
  "20": "B-NOUN+VERB",
33
  "21": "B-NUM",
34
  "22": "B-NUM+NOUN",
35
  "23": "B-PART",
36
+ "24": "B-PART+AUX",
37
+ "25": "B-PART+NOUN",
38
+ "26": "B-PART+VERB",
39
+ "27": "B-PRON",
40
+ "28": "B-PROPN",
41
+ "29": "B-PUNCT",
42
+ "30": "B-SCONJ",
43
+ "31": "B-SCONJ+ADV",
44
+ "32": "B-VERB",
45
  "33": "B-VERB+NOUN",
46
  "34": "B-VERB+PART",
47
  "35": "B-VERB+SCONJ",
 
59
  "47": "I-CCONJ",
60
  "48": "I-DET",
61
  "49": "I-DET+NOUN",
62
+ "50": "I-INFR.EV",
63
+ "51": "I-INTJ",
64
+ "52": "I-NOUN",
65
+ "53": "I-NOUN+ADP",
66
+ "54": "I-NOUN+ADP+NOUN",
67
+ "55": "I-NOUN+ADP+VERB",
68
  "56": "I-NOUN+ADV",
69
  "57": "I-NOUN+NOUN",
70
  "58": "I-NOUN+VERB",
71
  "59": "I-NUM",
72
  "60": "I-NUM+NOUN",
73
  "61": "I-PART",
74
+ "62": "I-PART+AUX",
75
+ "63": "I-PART+NOUN",
76
+ "64": "I-PART+VERB",
77
+ "65": "I-PRON",
78
+ "66": "I-PROPN",
79
+ "67": "I-PUNCT",
80
+ "68": "I-SCONJ",
81
+ "69": "I-SCONJ+ADV",
82
+ "70": "I-VERB",
83
  "71": "I-VERB+NOUN",
84
  "72": "I-VERB+PART",
85
  "73": "I-VERB+SCONJ",
 
91
  "79": "NOUN+NOUN",
92
  "80": "NOUN+VERB",
93
  "81": "NUM",
94
+ "82": "PART",
95
+ "83": "PART+VERB",
96
+ "84": "PROPN",
97
+ "85": "PUNCT",
98
+ "86": "SCONJ",
99
+ "87": "SYM",
100
+ "88": "VERB",
101
+ "89": "VERB+AUX",
102
+ "90": "VERB+NOUN",
103
+ "91": "VERB+PART",
104
+ "92": "VERB+VERB",
105
+ "93": "VERT",
106
+ "94": "X"
 
 
107
  },
108
  "initializer_range": 0.02,
109
  "intermediate_size": 3072,
 
120
  "B-CCONJ": 9,
121
  "B-DET": 10,
122
  "B-DET+NOUN": 11,
123
+ "B-INFR.EV": 12,
124
+ "B-INTJ": 13,
125
+ "B-NOUN": 14,
126
+ "B-NOUN+ADP": 15,
127
+ "B-NOUN+ADP+NOUN": 16,
128
+ "B-NOUN+ADP+VERB": 17,
129
  "B-NOUN+ADV": 18,
130
  "B-NOUN+NOUN": 19,
131
  "B-NOUN+VERB": 20,
132
  "B-NUM": 21,
133
  "B-NUM+NOUN": 22,
134
  "B-PART": 23,
135
+ "B-PART+AUX": 24,
136
+ "B-PART+NOUN": 25,
137
+ "B-PART+VERB": 26,
138
+ "B-PRON": 27,
139
+ "B-PROPN": 28,
140
+ "B-PUNCT": 29,
141
+ "B-SCONJ": 30,
142
+ "B-SCONJ+ADV": 31,
143
+ "B-VERB": 32,
144
  "B-VERB+NOUN": 33,
145
  "B-VERB+PART": 34,
146
  "B-VERB+SCONJ": 35,
 
158
  "I-CCONJ": 47,
159
  "I-DET": 48,
160
  "I-DET+NOUN": 49,
161
+ "I-INFR.EV": 50,
162
+ "I-INTJ": 51,
163
+ "I-NOUN": 52,
164
+ "I-NOUN+ADP": 53,
165
+ "I-NOUN+ADP+NOUN": 54,
166
+ "I-NOUN+ADP+VERB": 55,
167
  "I-NOUN+ADV": 56,
168
  "I-NOUN+NOUN": 57,
169
  "I-NOUN+VERB": 58,
170
  "I-NUM": 59,
171
  "I-NUM+NOUN": 60,
172
  "I-PART": 61,
173
+ "I-PART+AUX": 62,
174
+ "I-PART+NOUN": 63,
175
+ "I-PART+VERB": 64,
176
+ "I-PRON": 65,
177
+ "I-PROPN": 66,
178
+ "I-PUNCT": 67,
179
+ "I-SCONJ": 68,
180
+ "I-SCONJ+ADV": 69,
181
+ "I-VERB": 70,
182
  "I-VERB+NOUN": 71,
183
  "I-VERB+PART": 72,
184
  "I-VERB+SCONJ": 73,
 
190
  "NOUN+NOUN": 79,
191
  "NOUN+VERB": 80,
192
  "NUM": 81,
193
+ "PART": 82,
194
+ "PART+VERB": 83,
195
+ "PROPN": 84,
196
+ "PUNCT": 85,
197
+ "SCONJ": 86,
198
+ "SYM": 87,
199
+ "VERB": 88,
200
+ "VERB+AUX": 89,
201
+ "VERB+NOUN": 90,
202
+ "VERB+PART": 91,
203
+ "VERB+VERB": 92,
204
+ "VERT": 93,
205
+ "X": 94
 
 
206
  },
207
  "layer_norm_eps": 1e-07,
208
  "max_position_embeddings": 512,
 
254
  "Tam",
255
  "pa"
256
  ],
257
+ "tanpa": [
258
+ "tan",
259
+ "pa"
 
 
 
 
260
  ],
261
  "tanto": [
262
  "tan",
 
275
  "an"
276
  ]
277
  },
 
 
 
 
 
 
278
  "NOUN+ADP": {
279
  "Kunneiwano": [
280
  "Kunnei",
281
  "wano"
282
  ],
283
+ "Orota": [
284
+ "Oro",
285
+ "ta"
286
+ ],
287
  "Orowano": [
288
  "Oro",
289
  "wano"
290
  ],
291
+ "Oshmaketa": [
292
+ "Oshmake",
293
  "ta"
294
  ],
295
+ "Pet-samaketa": [
296
+ "Pet-samake",
297
  "ta"
298
  ],
299
  "Soita": [
300
  "Soi",
301
  "ta"
302
  ],
303
+ "cheppone": [
304
+ "cheppo",
305
+ "ne"
306
+ ],
307
  "keseta": [
308
  "kese",
309
  "ta"
310
  ],
311
+ "kesta": [
312
+ "kes",
313
+ "ta"
314
+ ],
315
  "kunneywano": [
316
  "kunney",
317
  "wano"
318
  ],
319
+ "neyta": [
320
+ "ney",
321
+ "ta"
322
+ ],
323
+ "orota": [
324
+ "oro",
325
+ "ta"
326
+ ],
327
  "orowa": [
328
  "oro",
329
  "wa"
 
332
  "oro",
333
  "wano"
334
  ],
335
+ "oshmaketa": [
336
+ "oshmake",
337
+ "ta"
338
+ ],
339
  "otta": [
340
  "ot",
341
  "ta"
342
  ],
343
+ "petsamaketa": [
344
+ "petsamake",
345
  "ta"
346
  ],
347
+ "samaketa": [
348
+ "samake",
349
  "ta"
350
  ],
351
  "soyta": [
 
355
  "tomta": [
356
  "tom",
357
  "ta"
 
 
 
 
358
  ]
359
  },
360
  "NOUN+ADP+NOUN": {
 
369
  "puray"
370
  ]
371
  },
372
+ "NOUN+ADP+VERB": {
373
+ "soytaarpa": [
374
+ "soy",
375
+ "ta",
376
+ "arpa"
377
+ ]
378
+ },
379
  "NOUN+ADV": {
380
  "Tambeta ne": [
381
  "Tambe",
 
383
  ]
384
  },
385
  "NOUN+NOUN": {
 
 
 
 
386
  "Petetoko": [
387
  "Pet",
388
  "etoko"
389
  ],
390
+ "Shirokanipe": [
391
+ "Shirokani",
392
+ "pe"
393
+ ],
394
  "hekattar": [
395
  "hekat",
396
  "tar"
397
  ],
 
 
 
 
398
  "inaanpe": [
399
  "inaan",
400
  "pe"
 
415
  "kamuy",
416
  "nis"
417
  ],
418
+ "konkanipe": [
419
+ "konkani",
420
+ "pe"
421
+ ],
422
  "petetok": [
423
  "pet",
424
  "etok"
 
426
  "petetoko": [
427
  "pet",
428
  "etoko"
429
+ ],
430
+ "sirokanipe": [
431
+ "sirokani",
432
+ "pe"
433
  ]
434
  },
435
  "NOUN+VERB": {
 
496
  "to"
497
  ]
498
  },
499
+ "PART+AUX": {
500
+ "chine": [
501
+ "chi",
502
+ "ne"
 
 
 
 
 
 
503
  ]
504
  },
505
  "PART+NOUN": {
506
  "=anpe": [
507
  "=an",
508
  "pe"
 
 
 
 
509
  ]
510
  },
511
  "PART+VERB": {
 
 
 
 
512
  "ainu-wap": [
513
  "a",
514
  "inu-wap"
 
532
  "karapa": [
533
  "k",
534
  "arapa"
 
 
 
 
535
  ]
536
  },
537
  "SCONJ+ADV": {
 
541
  ]
542
  },
543
  "VERB+AUX": {
 
 
 
 
 
 
 
 
544
  "sattek": [
545
  "sat",
546
  "tek"
 
559
  "an",
560
  "pe"
561
  ],
562
+ "anto": [
563
+ "an",
564
+ "to"
 
 
 
 
565
  ],
566
  "h\u00e9sep\u00e1ha": [
567
  "h\u00e9se",
 
571
  "kar",
572
  "i"
573
  ],
 
 
 
 
574
  "wenpuri": [
575
  "wen",
576
  "puri"
 
588
  "sapash": [
589
  "sap",
590
  "ash"
591
+ ],
592
+ "shinotash": [
593
+ "shinot",
594
+ "ash"
595
  ]
596
  },
597
  "VERB+SCONJ": {
 
620
  "torch_dtype": "float32",
621
  "transformers_version": "4.22.1",
622
  "type_vocab_size": 0,
623
+ "vocab_size": 6143
624
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:540dae77de84baec491ec433af635a0a345f9ceb4a7c0fe9845948fa5181efbf
3
- size 416095379
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b288c1d7f545b97c411dedfe9ad50d32805f0d3ac04e74b9e9059010a5cf0fe
3
+ size 419317843
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af6a91919dc6e9ba6390a1933911dd5ce8c8c44d9f84e56a26e21cd7e8f84cc
3
- size 461045771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cef6c2b683eee67b07578529bbb3849ede3b772becc7e15e271f2d26795a4bf0
3
+ size 464297035
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff