KoichiYasuoka commited on
Commit
e750b89
1 Parent(s): 400528c

model improved

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. config.json +143 -135
  3. pytorch_model.bin +2 -2
  4. supar.model +2 -2
README.md CHANGED
@@ -18,7 +18,7 @@ widget:
18
 
19
  ## Model Description
20
 
21
- This is a DeBERTa(V2) model pre-trained on 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-base-japanese-unidic](https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech).
22
 
23
  ## How to Use
24
 
 
18
 
19
  ## Model Description
20
 
21
+ This is a DeBERTa(V2) model pre-trained on 青空文庫 texts for POS-tagging and dependency-parsing, derived from [deberta-base-japanese-unidic](https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic). Every long-unit-word is tagged by [UPOS](https://universaldependencies.org/u/pos/) (Universal Part-Of-Speech) and [FEATS](https://universaldependencies.org/u/feat/).
22
 
23
  ## How to Use
24
 
config.json CHANGED
@@ -17,73 +17,77 @@
17
  "5": "ADP+VERB",
18
  "6": "ADV",
19
  "7": "AUX",
20
- "8": "B-ADJ",
21
- "9": "B-ADJ+VERB",
22
- "10": "B-ADP",
23
- "11": "B-ADP+ADJ",
24
- "12": "B-ADP+NOUN+ADP",
25
- "13": "B-ADV",
26
- "14": "B-AUX",
27
- "15": "B-AUX+AUX",
28
- "16": "B-AUX+NOUN",
29
- "17": "B-CCONJ",
30
- "18": "B-INTJ",
31
- "19": "B-NOUN",
32
- "20": "B-NOUN+ADP",
33
- "21": "B-NOUN+NOUN",
34
- "22": "B-NUM",
35
- "23": "B-PART",
36
- "24": "B-PRON",
37
- "25": "B-PROPN",
38
- "26": "B-PROPN+ADP",
39
- "27": "B-PUNCT",
40
- "28": "B-SCONJ",
41
- "29": "B-SYM",
42
- "30": "B-VERB",
43
- "31": "B-VERB+AUX",
44
- "32": "B-VERB+SCONJ",
45
- "33": "B-X",
46
- "34": "CCONJ",
47
- "35": "DET",
48
- "36": "DET+NOUN",
49
- "37": "I-ADJ",
50
- "38": "I-ADJ+VERB",
51
- "39": "I-ADP",
52
- "40": "I-ADP+ADJ",
53
- "41": "I-ADP+NOUN+ADP",
54
- "42": "I-ADV",
55
- "43": "I-AUX",
56
- "44": "I-AUX+AUX",
57
- "45": "I-AUX+NOUN",
58
- "46": "I-CCONJ",
59
- "47": "I-INTJ",
60
- "48": "I-NOUN",
61
- "49": "I-NOUN+ADP",
62
- "50": "I-NOUN+NOUN",
63
- "51": "I-NUM",
64
- "52": "I-PART",
65
- "53": "I-PRON",
66
- "54": "I-PROPN",
67
- "55": "I-PROPN+ADP",
68
- "56": "I-PUNCT",
69
- "57": "I-SCONJ",
70
- "58": "I-SYM",
71
- "59": "I-VERB",
72
- "60": "I-VERB+AUX",
73
- "61": "I-VERB+SCONJ",
74
- "62": "I-X",
75
- "63": "INTJ",
76
- "64": "NOUN",
77
- "65": "NUM",
78
- "66": "PART",
79
- "67": "PRON",
80
- "68": "PROPN",
81
- "69": "PUNCT",
82
- "70": "SCONJ",
83
- "71": "SYM",
84
- "72": "VERB",
85
- "73": "VERB+AUX",
86
- "74": "X"
 
 
 
 
87
  },
88
  "initializer_range": 0.02,
89
  "intermediate_size": 3072,
@@ -96,73 +100,77 @@
96
  "ADP+VERB": 5,
97
  "ADV": 6,
98
  "AUX": 7,
99
- "B-ADJ": 8,
100
- "B-ADJ+VERB": 9,
101
- "B-ADP": 10,
102
- "B-ADP+ADJ": 11,
103
- "B-ADP+NOUN+ADP": 12,
104
- "B-ADV": 13,
105
- "B-AUX": 14,
106
- "B-AUX+AUX": 15,
107
- "B-AUX+NOUN": 16,
108
- "B-CCONJ": 17,
109
- "B-INTJ": 18,
110
- "B-NOUN": 19,
111
- "B-NOUN+ADP": 20,
112
- "B-NOUN+NOUN": 21,
113
- "B-NUM": 22,
114
- "B-PART": 23,
115
- "B-PRON": 24,
116
- "B-PROPN": 25,
117
- "B-PROPN+ADP": 26,
118
- "B-PUNCT": 27,
119
- "B-SCONJ": 28,
120
- "B-SYM": 29,
121
- "B-VERB": 30,
122
- "B-VERB+AUX": 31,
123
- "B-VERB+SCONJ": 32,
124
- "B-X": 33,
125
- "CCONJ": 34,
126
- "DET": 35,
127
- "DET+NOUN": 36,
128
- "I-ADJ": 37,
129
- "I-ADJ+VERB": 38,
130
- "I-ADP": 39,
131
- "I-ADP+ADJ": 40,
132
- "I-ADP+NOUN+ADP": 41,
133
- "I-ADV": 42,
134
- "I-AUX": 43,
135
- "I-AUX+AUX": 44,
136
- "I-AUX+NOUN": 45,
137
- "I-CCONJ": 46,
138
- "I-INTJ": 47,
139
- "I-NOUN": 48,
140
- "I-NOUN+ADP": 49,
141
- "I-NOUN+NOUN": 50,
142
- "I-NUM": 51,
143
- "I-PART": 52,
144
- "I-PRON": 53,
145
- "I-PROPN": 54,
146
- "I-PROPN+ADP": 55,
147
- "I-PUNCT": 56,
148
- "I-SCONJ": 57,
149
- "I-SYM": 58,
150
- "I-VERB": 59,
151
- "I-VERB+AUX": 60,
152
- "I-VERB+SCONJ": 61,
153
- "I-X": 62,
154
- "INTJ": 63,
155
- "NOUN": 64,
156
- "NUM": 65,
157
- "PART": 66,
158
- "PRON": 67,
159
- "PROPN": 68,
160
- "PUNCT": 69,
161
- "SCONJ": 70,
162
- "SYM": 71,
163
- "VERB": 72,
164
- "VERB+AUX": 73,
165
- "X": 74
 
 
 
 
166
  },
167
  "layer_norm_eps": 1e-07,
168
  "max_position_embeddings": 512,
@@ -293,7 +301,7 @@
293
  },
294
  "tokenizer_class": "BertJapaneseTokenizer",
295
  "torch_dtype": "float32",
296
- "transformers_version": "4.19.2",
297
  "type_vocab_size": 0,
298
  "vocab_size": 32000
299
  }
 
17
  "5": "ADP+VERB",
18
  "6": "ADV",
19
  "7": "AUX",
20
+ "8": "AUX|Polarity=Neg",
21
+ "9": "B-ADJ",
22
+ "10": "B-ADJ+VERB",
23
+ "11": "B-ADP",
24
+ "12": "B-ADP+ADJ",
25
+ "13": "B-ADP+NOUN+ADP",
26
+ "14": "B-ADV",
27
+ "15": "B-AUX",
28
+ "16": "B-AUX+AUX",
29
+ "17": "B-AUX+NOUN",
30
+ "18": "B-AUX|Polarity=Neg",
31
+ "19": "B-CCONJ",
32
+ "20": "B-INTJ",
33
+ "21": "B-NOUN",
34
+ "22": "B-NOUN+ADP",
35
+ "23": "B-NOUN+NOUN",
36
+ "24": "B-NUM",
37
+ "25": "B-PART",
38
+ "26": "B-PRON",
39
+ "27": "B-PROPN",
40
+ "28": "B-PROPN+ADP",
41
+ "29": "B-PUNCT",
42
+ "30": "B-SCONJ",
43
+ "31": "B-SYM",
44
+ "32": "B-VERB",
45
+ "33": "B-VERB+AUX",
46
+ "34": "B-VERB+SCONJ",
47
+ "35": "B-X",
48
+ "36": "CCONJ",
49
+ "37": "DET",
50
+ "38": "DET+NOUN",
51
+ "39": "I-ADJ",
52
+ "40": "I-ADJ+VERB",
53
+ "41": "I-ADP",
54
+ "42": "I-ADP+ADJ",
55
+ "43": "I-ADP+NOUN+ADP",
56
+ "44": "I-ADV",
57
+ "45": "I-AUX",
58
+ "46": "I-AUX+AUX",
59
+ "47": "I-AUX+NOUN",
60
+ "48": "I-AUX|Polarity=Neg",
61
+ "49": "I-CCONJ",
62
+ "50": "I-INTJ",
63
+ "51": "I-NOUN",
64
+ "52": "I-NOUN+ADP",
65
+ "53": "I-NOUN+NOUN",
66
+ "54": "I-NUM",
67
+ "55": "I-PART",
68
+ "56": "I-PRON",
69
+ "57": "I-PROPN",
70
+ "58": "I-PROPN+ADP",
71
+ "59": "I-PUNCT",
72
+ "60": "I-SCONJ",
73
+ "61": "I-SYM",
74
+ "62": "I-VERB",
75
+ "63": "I-VERB+AUX",
76
+ "64": "I-VERB+SCONJ",
77
+ "65": "I-X",
78
+ "66": "INTJ",
79
+ "67": "NOUN",
80
+ "68": "NOUN|Polarity=Neg",
81
+ "69": "NUM",
82
+ "70": "PART",
83
+ "71": "PRON",
84
+ "72": "PROPN",
85
+ "73": "PUNCT",
86
+ "74": "SCONJ",
87
+ "75": "SYM",
88
+ "76": "VERB",
89
+ "77": "VERB+AUX",
90
+ "78": "X"
91
  },
92
  "initializer_range": 0.02,
93
  "intermediate_size": 3072,
 
100
  "ADP+VERB": 5,
101
  "ADV": 6,
102
  "AUX": 7,
103
+ "AUX|Polarity=Neg": 8,
104
+ "B-ADJ": 9,
105
+ "B-ADJ+VERB": 10,
106
+ "B-ADP": 11,
107
+ "B-ADP+ADJ": 12,
108
+ "B-ADP+NOUN+ADP": 13,
109
+ "B-ADV": 14,
110
+ "B-AUX": 15,
111
+ "B-AUX+AUX": 16,
112
+ "B-AUX+NOUN": 17,
113
+ "B-AUX|Polarity=Neg": 18,
114
+ "B-CCONJ": 19,
115
+ "B-INTJ": 20,
116
+ "B-NOUN": 21,
117
+ "B-NOUN+ADP": 22,
118
+ "B-NOUN+NOUN": 23,
119
+ "B-NUM": 24,
120
+ "B-PART": 25,
121
+ "B-PRON": 26,
122
+ "B-PROPN": 27,
123
+ "B-PROPN+ADP": 28,
124
+ "B-PUNCT": 29,
125
+ "B-SCONJ": 30,
126
+ "B-SYM": 31,
127
+ "B-VERB": 32,
128
+ "B-VERB+AUX": 33,
129
+ "B-VERB+SCONJ": 34,
130
+ "B-X": 35,
131
+ "CCONJ": 36,
132
+ "DET": 37,
133
+ "DET+NOUN": 38,
134
+ "I-ADJ": 39,
135
+ "I-ADJ+VERB": 40,
136
+ "I-ADP": 41,
137
+ "I-ADP+ADJ": 42,
138
+ "I-ADP+NOUN+ADP": 43,
139
+ "I-ADV": 44,
140
+ "I-AUX": 45,
141
+ "I-AUX+AUX": 46,
142
+ "I-AUX+NOUN": 47,
143
+ "I-AUX|Polarity=Neg": 48,
144
+ "I-CCONJ": 49,
145
+ "I-INTJ": 50,
146
+ "I-NOUN": 51,
147
+ "I-NOUN+ADP": 52,
148
+ "I-NOUN+NOUN": 53,
149
+ "I-NUM": 54,
150
+ "I-PART": 55,
151
+ "I-PRON": 56,
152
+ "I-PROPN": 57,
153
+ "I-PROPN+ADP": 58,
154
+ "I-PUNCT": 59,
155
+ "I-SCONJ": 60,
156
+ "I-SYM": 61,
157
+ "I-VERB": 62,
158
+ "I-VERB+AUX": 63,
159
+ "I-VERB+SCONJ": 64,
160
+ "I-X": 65,
161
+ "INTJ": 66,
162
+ "NOUN": 67,
163
+ "NOUN|Polarity=Neg": 68,
164
+ "NUM": 69,
165
+ "PART": 70,
166
+ "PRON": 71,
167
+ "PROPN": 72,
168
+ "PUNCT": 73,
169
+ "SCONJ": 74,
170
+ "SYM": 75,
171
+ "VERB": 76,
172
+ "VERB+AUX": 77,
173
+ "X": 78
174
  },
175
  "layer_norm_eps": 1e-07,
176
  "max_position_embeddings": 512,
 
301
  },
302
  "tokenizer_class": "BertJapaneseTokenizer",
303
  "torch_dtype": "float32",
304
+ "transformers_version": "4.19.4",
305
  "type_vocab_size": 0,
306
  "vocab_size": 32000
307
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6f2f96ca2cb845c7d5bb259b9f437ecf4a20dd87253d0e7e98010d4d244cce3
3
- size 440402547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0093680f12007734ce2ff94ad01ca31c4f4002915fe162806aeb6f6074cfe8ed
3
+ size 440414835
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:769e3144ddf44f3ab4b191aafa5aa8e5a695dc99cf1e8538da6aa4f8f267aa23
3
- size 488904747
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a9def51a3a3c8de34698a153d29ebfa02b4569f671455bdd266fd4901cc5e7
3
+ size 488904683