KoichiYasuoka commited on
Commit
dfb589d
1 Parent(s): d264b5c

initial release

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - "ain"
4
+ tags:
5
+ - "ainu"
6
+ - "token-classification"
7
+ - "pos"
8
+ - "dependency-parsing"
9
+ license: "cc-by-sa-4.0"
10
+ pipeline_tag: "token-classification"
11
+ ---
12
+
13
+ # deberta-base-ainu-upos
14
+
15
+ ## Model Description
16
+
17
+ This is a DeBERTa(V2) model pre-trained on Ainu texts (both カタカナ and romanized) for POS-tagging and dependency-parsing, derived from [deberta-base-ainu](https://huggingface.co/KoichiYasuoka/deberta-base-ainu). Every word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech).
18
+
19
+ ## How to Use
20
+
21
+ ```py
22
+ from transformers import AutoTokenizer,AutoModelForTokenClassification
23
+ tokenizer=AutoTokenizer.from_pretrained("KoichiYasuoka/deberta-base-ainu-upos")
24
+ model=AutoModelForTokenClassification.from_pretrained("KoichiYasuoka/deberta-base-ainu-upos")
25
+ ```
26
+
27
+ or
28
+
29
+ ```py
30
+ import esupar
31
+ nlp=esupar.load("KoichiYasuoka/deberta-base-ainu-upos")
32
+ ```
33
+
34
+ ## See Also
35
+
36
+ [esupar](https://github.com/KoichiYasuoka/esupar): Tokenizer POS-tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models
37
+
config.json ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "ADP",
13
+ "1": "ADV",
14
+ "2": "AUX",
15
+ "3": "AUX+NOUN",
16
+ "4": "B-ADP",
17
+ "5": "B-ADP+VERB+NOUN",
18
+ "6": "B-ADV",
19
+ "7": "B-AUX",
20
+ "8": "B-AUX+PART",
21
+ "9": "B-CCONJ",
22
+ "10": "B-DET",
23
+ "11": "B-DET+NOUN",
24
+ "12": "B-DET+VERB",
25
+ "13": "B-INFR.EV",
26
+ "14": "B-INTJ",
27
+ "15": "B-NOUN",
28
+ "16": "B-NOUN+ADP",
29
+ "17": "B-NOUN+ADP+NOUN",
30
+ "18": "B-NOUN+ADV",
31
+ "19": "B-NOUN+NOUN",
32
+ "20": "B-NOUN+VERB",
33
+ "21": "B-NUM",
34
+ "22": "B-NUM+NOUN",
35
+ "23": "B-PART",
36
+ "24": "B-PART+NOUN",
37
+ "25": "B-PART+VERB",
38
+ "26": "B-PRON",
39
+ "27": "B-PROPN",
40
+ "28": "B-PUNCT",
41
+ "29": "B-SCONJ",
42
+ "30": "B-SCONJ+ADV",
43
+ "31": "B-VERB",
44
+ "32": "B-VERB+AUX",
45
+ "33": "B-VERB+NOUN",
46
+ "34": "B-VERB+PART",
47
+ "35": "B-VERB+SCONJ",
48
+ "36": "B-VERT",
49
+ "37": "B-X",
50
+ "38": "CCONJ",
51
+ "39": "DET",
52
+ "40": "DET+NOUN",
53
+ "41": "DET+SCONJ+VERB",
54
+ "42": "I-ADP",
55
+ "43": "I-ADP+VERB+NOUN",
56
+ "44": "I-ADV",
57
+ "45": "I-AUX",
58
+ "46": "I-AUX+PART",
59
+ "47": "I-CCONJ",
60
+ "48": "I-DET",
61
+ "49": "I-DET+NOUN",
62
+ "50": "I-DET+VERB",
63
+ "51": "I-INFR.EV",
64
+ "52": "I-INTJ",
65
+ "53": "I-NOUN",
66
+ "54": "I-NOUN+ADP",
67
+ "55": "I-NOUN+ADP+NOUN",
68
+ "56": "I-NOUN+ADV",
69
+ "57": "I-NOUN+NOUN",
70
+ "58": "I-NOUN+VERB",
71
+ "59": "I-NUM",
72
+ "60": "I-NUM+NOUN",
73
+ "61": "I-PART",
74
+ "62": "I-PART+NOUN",
75
+ "63": "I-PART+VERB",
76
+ "64": "I-PRON",
77
+ "65": "I-PROPN",
78
+ "66": "I-PUNCT",
79
+ "67": "I-SCONJ",
80
+ "68": "I-SCONJ+ADV",
81
+ "69": "I-VERB",
82
+ "70": "I-VERB+AUX",
83
+ "71": "I-VERB+NOUN",
84
+ "72": "I-VERB+PART",
85
+ "73": "I-VERB+SCONJ",
86
+ "74": "I-VERT",
87
+ "75": "I-X",
88
+ "76": "INTJ",
89
+ "77": "NOUN",
90
+ "78": "NOUN+ADP",
91
+ "79": "NOUN+NOUN",
92
+ "80": "NOUN+VERB",
93
+ "81": "NUM",
94
+ "82": "NUM+VERB+NOUN",
95
+ "83": "PART",
96
+ "84": "PART+NOUN",
97
+ "85": "PART+VERB",
98
+ "86": "PROPN",
99
+ "87": "PUNCT",
100
+ "88": "SCONJ",
101
+ "89": "SYM",
102
+ "90": "VERB",
103
+ "91": "VERB+AUX",
104
+ "92": "VERB+NOUN",
105
+ "93": "VERB+PART",
106
+ "94": "VERB+VERB",
107
+ "95": "VERT",
108
+ "96": "X"
109
+ },
110
+ "initializer_range": 0.02,
111
+ "intermediate_size": 3072,
112
+ "label2id": {
113
+ "ADP": 0,
114
+ "ADV": 1,
115
+ "AUX": 2,
116
+ "AUX+NOUN": 3,
117
+ "B-ADP": 4,
118
+ "B-ADP+VERB+NOUN": 5,
119
+ "B-ADV": 6,
120
+ "B-AUX": 7,
121
+ "B-AUX+PART": 8,
122
+ "B-CCONJ": 9,
123
+ "B-DET": 10,
124
+ "B-DET+NOUN": 11,
125
+ "B-DET+VERB": 12,
126
+ "B-INFR.EV": 13,
127
+ "B-INTJ": 14,
128
+ "B-NOUN": 15,
129
+ "B-NOUN+ADP": 16,
130
+ "B-NOUN+ADP+NOUN": 17,
131
+ "B-NOUN+ADV": 18,
132
+ "B-NOUN+NOUN": 19,
133
+ "B-NOUN+VERB": 20,
134
+ "B-NUM": 21,
135
+ "B-NUM+NOUN": 22,
136
+ "B-PART": 23,
137
+ "B-PART+NOUN": 24,
138
+ "B-PART+VERB": 25,
139
+ "B-PRON": 26,
140
+ "B-PROPN": 27,
141
+ "B-PUNCT": 28,
142
+ "B-SCONJ": 29,
143
+ "B-SCONJ+ADV": 30,
144
+ "B-VERB": 31,
145
+ "B-VERB+AUX": 32,
146
+ "B-VERB+NOUN": 33,
147
+ "B-VERB+PART": 34,
148
+ "B-VERB+SCONJ": 35,
149
+ "B-VERT": 36,
150
+ "B-X": 37,
151
+ "CCONJ": 38,
152
+ "DET": 39,
153
+ "DET+NOUN": 40,
154
+ "DET+SCONJ+VERB": 41,
155
+ "I-ADP": 42,
156
+ "I-ADP+VERB+NOUN": 43,
157
+ "I-ADV": 44,
158
+ "I-AUX": 45,
159
+ "I-AUX+PART": 46,
160
+ "I-CCONJ": 47,
161
+ "I-DET": 48,
162
+ "I-DET+NOUN": 49,
163
+ "I-DET+VERB": 50,
164
+ "I-INFR.EV": 51,
165
+ "I-INTJ": 52,
166
+ "I-NOUN": 53,
167
+ "I-NOUN+ADP": 54,
168
+ "I-NOUN+ADP+NOUN": 55,
169
+ "I-NOUN+ADV": 56,
170
+ "I-NOUN+NOUN": 57,
171
+ "I-NOUN+VERB": 58,
172
+ "I-NUM": 59,
173
+ "I-NUM+NOUN": 60,
174
+ "I-PART": 61,
175
+ "I-PART+NOUN": 62,
176
+ "I-PART+VERB": 63,
177
+ "I-PRON": 64,
178
+ "I-PROPN": 65,
179
+ "I-PUNCT": 66,
180
+ "I-SCONJ": 67,
181
+ "I-SCONJ+ADV": 68,
182
+ "I-VERB": 69,
183
+ "I-VERB+AUX": 70,
184
+ "I-VERB+NOUN": 71,
185
+ "I-VERB+PART": 72,
186
+ "I-VERB+SCONJ": 73,
187
+ "I-VERT": 74,
188
+ "I-X": 75,
189
+ "INTJ": 76,
190
+ "NOUN": 77,
191
+ "NOUN+ADP": 78,
192
+ "NOUN+NOUN": 79,
193
+ "NOUN+VERB": 80,
194
+ "NUM": 81,
195
+ "NUM+VERB+NOUN": 82,
196
+ "PART": 83,
197
+ "PART+NOUN": 84,
198
+ "PART+VERB": 85,
199
+ "PROPN": 86,
200
+ "PUNCT": 87,
201
+ "SCONJ": 88,
202
+ "SYM": 89,
203
+ "VERB": 90,
204
+ "VERB+AUX": 91,
205
+ "VERB+NOUN": 92,
206
+ "VERB+PART": 93,
207
+ "VERB+VERB": 94,
208
+ "VERT": 95,
209
+ "X": 96
210
+ },
211
+ "layer_norm_eps": 1e-07,
212
+ "max_position_embeddings": 512,
213
+ "max_relative_positions": -1,
214
+ "model_type": "deberta-v2",
215
+ "num_attention_heads": 12,
216
+ "num_hidden_layers": 12,
217
+ "pad_token_id": 1,
218
+ "pooler_dropout": 0,
219
+ "pooler_hidden_act": "gelu",
220
+ "pooler_hidden_size": 768,
221
+ "pos_att_type": [
222
+ "p2c",
223
+ "c2p"
224
+ ],
225
+ "position_biased_input": false,
226
+ "relative_attention": true,
227
+ "task_specific_params": {
228
+ "upos_multiword": {
229
+ "ADP+VERB+NOUN": {
230
+ "tambe": [
231
+ "ta",
232
+ "m",
233
+ "be"
234
+ ]
235
+ },
236
+ "AUX+NOUN": {
237
+ "nep": [
238
+ "ne",
239
+ "p"
240
+ ]
241
+ },
242
+ "AUX+PART": {
243
+ "nangonna": [
244
+ "nangon",
245
+ "na"
246
+ ],
247
+ "nankonna": [
248
+ "nankon",
249
+ "na"
250
+ ]
251
+ },
252
+ "DET+NOUN": {
253
+ "Tamba": [
254
+ "Tam",
255
+ "ba"
256
+ ],
257
+ "Tampa": [
258
+ "Tam",
259
+ "pa"
260
+ ],
261
+ "oararke": [
262
+ "oar",
263
+ "arke"
264
+ ],
265
+ "oararkehe": [
266
+ "oar",
267
+ "arkehe"
268
+ ],
269
+ "tanto": [
270
+ "tan",
271
+ "to"
272
+ ]
273
+ },
274
+ "DET+SCONJ+VERB": {
275
+ "Newaan": [
276
+ "Ne",
277
+ "wa",
278
+ "an"
279
+ ],
280
+ "newaan": [
281
+ "ne",
282
+ "wa",
283
+ "an"
284
+ ]
285
+ },
286
+ "DET+VERB": {
287
+ "iyorun": [
288
+ "iyor",
289
+ "un"
290
+ ]
291
+ },
292
+ "NOUN+ADP": {
293
+ "Kunneiwano": [
294
+ "Kunnei",
295
+ "wano"
296
+ ],
297
+ "Orowano": [
298
+ "Oro",
299
+ "wano"
300
+ ],
301
+ "Pet-samaketa": [
302
+ "Pet-samake",
303
+ "ta"
304
+ ],
305
+ "Shoita": [
306
+ "Shoi",
307
+ "ta"
308
+ ],
309
+ "Soita": [
310
+ "Soi",
311
+ "ta"
312
+ ],
313
+ "keseta": [
314
+ "kese",
315
+ "ta"
316
+ ],
317
+ "kunneywano": [
318
+ "kunney",
319
+ "wano"
320
+ ],
321
+ "orowa": [
322
+ "oro",
323
+ "wa"
324
+ ],
325
+ "orowano": [
326
+ "oro",
327
+ "wano"
328
+ ],
329
+ "otta": [
330
+ "ot",
331
+ "ta"
332
+ ],
333
+ "samaketa": [
334
+ "samake",
335
+ "ta"
336
+ ],
337
+ "shoita": [
338
+ "shoi",
339
+ "ta"
340
+ ],
341
+ "soyta": [
342
+ "soy",
343
+ "ta"
344
+ ],
345
+ "tomta": [
346
+ "tom",
347
+ "ta"
348
+ ],
349
+ "tumukeheta": [
350
+ "tumukehe",
351
+ "ta"
352
+ ]
353
+ },
354
+ "NOUN+ADP+NOUN": {
355
+ "rorunpurai": [
356
+ "ror",
357
+ "un",
358
+ "purai"
359
+ ],
360
+ "rorunpuray": [
361
+ "ror",
362
+ "un",
363
+ "puray"
364
+ ]
365
+ },
366
+ "NOUN+ADV": {
367
+ "Tambeta ne": [
368
+ "Tambe",
369
+ "ta ne"
370
+ ]
371
+ },
372
+ "NOUN+NOUN": {
373
+ "Hinakoro": [
374
+ "Hinak",
375
+ "oro"
376
+ ],
377
+ "Petetoko": [
378
+ "Pet",
379
+ "etoko"
380
+ ],
381
+ "hekattar": [
382
+ "hekat",
383
+ "tar"
384
+ ],
385
+ "hinakoro": [
386
+ "hinak",
387
+ "oro"
388
+ ],
389
+ "inaanpe": [
390
+ "inaan",
391
+ "pe"
392
+ ],
393
+ "inanpe": [
394
+ "inan",
395
+ "pe"
396
+ ],
397
+ "iporohoka": [
398
+ "iporoho",
399
+ "ka"
400
+ ],
401
+ "kamuinish": [
402
+ "kamui",
403
+ "nish"
404
+ ],
405
+ "kamuynis": [
406
+ "kamuy",
407
+ "nis"
408
+ ],
409
+ "petetok": [
410
+ "pet",
411
+ "etok"
412
+ ],
413
+ "petetoko": [
414
+ "pet",
415
+ "etoko"
416
+ ]
417
+ },
418
+ "NOUN+VERB": {
419
+ "Omakun": [
420
+ "Omak",
421
+ "un"
422
+ ],
423
+ "Orepun": [
424
+ "Orep",
425
+ "un"
426
+ ],
427
+ "Shiriki": [
428
+ "Shiri",
429
+ "ki"
430
+ ],
431
+ "kotankor": [
432
+ "kotan",
433
+ "kor"
434
+ ],
435
+ "makun": [
436
+ "mak",
437
+ "un"
438
+ ],
439
+ "repun": [
440
+ "rep",
441
+ "un"
442
+ ],
443
+ "rikunruke": [
444
+ "rik",
445
+ "unruke"
446
+ ],
447
+ "siriki": [
448
+ "siri",
449
+ "ki"
450
+ ],
451
+ "ukakushte": [
452
+ "uka",
453
+ "kushte"
454
+ ],
455
+ "ukakuste": [
456
+ "uka",
457
+ "kuste"
458
+ ],
459
+ "uraikik": [
460
+ "urai",
461
+ "kik"
462
+ ]
463
+ },
464
+ "NUM+NOUN": {
465
+ "Wanto": [
466
+ "Wan",
467
+ "to"
468
+ ],
469
+ "hotnepa": [
470
+ "hotne",
471
+ "pa"
472
+ ],
473
+ "wanpe": [
474
+ "wan",
475
+ "pe"
476
+ ],
477
+ "wanto": [
478
+ "wan",
479
+ "to"
480
+ ]
481
+ },
482
+ "NUM+VERB+NOUN": {
483
+ "Shineanto": [
484
+ "Shine",
485
+ "an",
486
+ "to"
487
+ ],
488
+ "sineanto": [
489
+ "sine",
490
+ "an",
491
+ "to"
492
+ ]
493
+ },
494
+ "PART+NOUN": {
495
+ "=anpe": [
496
+ "=an",
497
+ "pe"
498
+ ],
499
+ "shichorpok": [
500
+ "shi",
501
+ "chorpok"
502
+ ]
503
+ },
504
+ "PART+VERB": {
505
+ "Chirushka": [
506
+ "Chi",
507
+ "rushka"
508
+ ],
509
+ "ainu-wap": [
510
+ "a",
511
+ "inu-wap"
512
+ ],
513
+ "akus": [
514
+ "a",
515
+ "kus"
516
+ ],
517
+ "chiki": [
518
+ "chi",
519
+ "ki"
520
+ ],
521
+ "chikik": [
522
+ "chi",
523
+ "kik"
524
+ ],
525
+ "eram an": [
526
+ "e",
527
+ "ram an"
528
+ ],
529
+ "karapa": [
530
+ "k",
531
+ "arapa"
532
+ ],
533
+ "shiokote": [
534
+ "shi",
535
+ "okote"
536
+ ]
537
+ },
538
+ "SCONJ+ADV": {
539
+ "koiramno": [
540
+ "ko",
541
+ "iramno"
542
+ ]
543
+ },
544
+ "VERB+AUX": {
545
+ "poppeta ashinnangoro": [
546
+ "poppeta ashin",
547
+ "nangoro"
548
+ ],
549
+ "poppetaasinnankor": [
550
+ "poppetaasin",
551
+ "nankor"
552
+ ],
553
+ "sattek": [
554
+ "sat",
555
+ "tek"
556
+ ]
557
+ },
558
+ "VERB+NOUN": {
559
+ "Hesepa": [
560
+ "Hese",
561
+ "pa"
562
+ ],
563
+ "ambe": [
564
+ "am",
565
+ "be"
566
+ ],
567
+ "anpe": [
568
+ "an",
569
+ "pe"
570
+ ],
571
+ "ashbe": [
572
+ "ash",
573
+ "be"
574
+ ],
575
+ "aspe": [
576
+ "as",
577
+ "pe"
578
+ ],
579
+ "h\u00e9sep\u00e1ha": [
580
+ "h\u00e9se",
581
+ "p\u00e1ha"
582
+ ],
583
+ "kari": [
584
+ "kar",
585
+ "i"
586
+ ],
587
+ "ohasiri": [
588
+ "oha",
589
+ "siri"
590
+ ],
591
+ "wenpuri": [
592
+ "wen",
593
+ "puri"
594
+ ]
595
+ },
596
+ "VERB+PART": {
597
+ "kari": [
598
+ "kar",
599
+ "i"
600
+ ],
601
+ "sapash": [
602
+ "sap",
603
+ "ash"
604
+ ]
605
+ },
606
+ "VERB+SCONJ": {
607
+ "anak un": [
608
+ "an",
609
+ "ak un"
610
+ ],
611
+ "anakanakne": [
612
+ "an",
613
+ "akanakne"
614
+ ],
615
+ "sakno": [
616
+ "sak",
617
+ "no"
618
+ ]
619
+ },
620
+ "VERB+VERB": {
621
+ "ranran": [
622
+ "ran",
623
+ "ran"
624
+ ]
625
+ }
626
+ }
627
+ },
628
+ "tokenizer_class": "DebertaV2TokenizerFast",
629
+ "torch_dtype": "float32",
630
+ "transformers_version": "4.22.1",
631
+ "type_vocab_size": 0,
632
+ "vocab_size": 5093
633
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d16d2e33c010db9c1e11f8c3958ba24e6ebd0fff465cdceccb0133af4c5f21b9
3
+ size 416098451
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
3
+ size 1
supar.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d7290b028466f77fd2c23367731a530dc9d0b146977511d94f39e3aae9a543
3
+ size 461042443
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "split_by_punct": true,
12
+ "tokenizer_class": "DebertaV2TokenizerFast",
13
+ "unk_token": "[UNK]"
14
+ }