adrianeboyd commited on
Commit
3546373
1 Parent(s): 9179a65

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,62 +14,62 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8161157025
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8229166667
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8195020747
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9641646489
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.9641646489
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
- value: 0.9538014528
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
- value: 0.9518644068
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
- value: 0.8220111732
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
- value: 0.7817440366
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
- value: 0.9055258467
73
  ---
74
  ### Details: https://spacy.io/models/da#da_core_news_lg
75
 
@@ -78,8 +78,8 @@ Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, l
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `da_core_news_lg` |
81
- | **Version** | `3.4.0` |
82
- | **spaCy** | `>=3.4.0,<3.5.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `lemmatizer`, `attribute_ruler`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `lemmatizer`, `senter`, `attribute_ruler`, `ner` |
85
  | **Vectors** | 500000 keys, 500000 unique vectors (300 dimensions) |
@@ -105,22 +105,22 @@ Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, l
105
 
106
  | Type | Score |
107
  | --- | --- |
108
- | `TOKEN_ACC` | 99.95 |
109
  | `TOKEN_P` | 99.78 |
110
  | `TOKEN_R` | 99.75 |
111
  | `TOKEN_F` | 99.76 |
112
- | `POS_ACC` | 96.42 |
113
- | `MORPH_ACC` | 95.38 |
114
- | `MORPH_MICRO_P` | 97.06 |
115
- | `MORPH_MICRO_R` | 96.42 |
116
- | `MORPH_MICRO_F` | 96.74 |
117
- | `SENTS_P` | 91.04 |
118
- | `SENTS_R` | 90.07 |
119
- | `SENTS_F` | 90.55 |
120
- | `DEP_UAS` | 82.20 |
121
- | `DEP_LAS` | 78.17 |
122
- | `LEMMA_ACC` | 95.19 |
123
- | `TAG_ACC` | 96.42 |
124
- | `ENTS_P` | 81.61 |
125
- | `ENTS_R` | 82.29 |
126
- | `ENTS_F` | 81.95 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.800407332
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.81875
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8094747683
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9665859564
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.9665859564
38
  - task:
39
  name: MORPH
40
  type: token-classification
41
  metrics:
42
  - name: Morph (UFeats) Accuracy
43
  type: accuracy
44
+ value: 0.9573849879
45
  - task:
46
  name: LEMMA
47
  type: token-classification
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
+ value: 0.948377724
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
+ value: 0.8225238813
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
+ value: 0.7828612927
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
+ value: 0.8869100623
73
  ---
74
  ### Details: https://spacy.io/models/da#da_core_news_lg
75
 
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `da_core_news_lg` |
81
+ | **Version** | `3.5.0` |
82
+ | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `lemmatizer`, `attribute_ruler`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `lemmatizer`, `senter`, `attribute_ruler`, `ner` |
85
  | **Vectors** | 500000 keys, 500000 unique vectors (300 dimensions) |
 
105
 
106
  | Type | Score |
107
  | --- | --- |
108
+ | `TOKEN_ACC` | 99.89 |
109
  | `TOKEN_P` | 99.78 |
110
  | `TOKEN_R` | 99.75 |
111
  | `TOKEN_F` | 99.76 |
112
+ | `POS_ACC` | 96.66 |
113
+ | `MORPH_ACC` | 95.74 |
114
+ | `MORPH_MICRO_P` | 97.43 |
115
+ | `MORPH_MICRO_R` | 96.75 |
116
+ | `MORPH_MICRO_F` | 97.09 |
117
+ | `SENTS_P` | 89.09 |
118
+ | `SENTS_R` | 88.30 |
119
+ | `SENTS_F` | 88.69 |
120
+ | `DEP_UAS` | 82.25 |
121
+ | `DEP_LAS` | 78.29 |
122
+ | `LEMMA_ACC` | 94.84 |
123
+ | `TAG_ACC` | 96.66 |
124
+ | `ENTS_P` | 80.04 |
125
+ | `ENTS_R` | 81.88 |
126
+ | `ENTS_F` | 80.95 |
accuracy.json CHANGED
@@ -1,53 +1,53 @@
1
  {
2
- "token_acc": 0.9994672349,
3
  "token_p": 0.9977732598,
4
  "token_r": 0.9974835463,
5
  "token_f": 0.997628382,
6
- "pos_acc": 0.9641646489,
7
- "morph_acc": 0.9538014528,
8
- "morph_micro_p": 0.9705792683,
9
- "morph_micro_r": 0.9641613245,
10
- "morph_micro_f": 0.9673596516,
11
  "morph_per_feat": {
12
  "Mood": {
13
- "p": 0.9789473684,
14
- "r": 0.97521449,
15
- "f": 0.9770773639
16
  },
17
  "Tense": {
18
- "p": 0.9743396226,
19
- "r": 0.9721385542,
20
- "f": 0.973237844
21
  },
22
  "VerbForm": {
23
- "p": 0.9649014778,
24
- "r": 0.958996328,
25
- "f": 0.9619398404
26
  },
27
  "Voice": {
28
- "p": 0.9789631856,
29
- "r": 0.9738415546,
30
- "f": 0.9763956538
31
  },
32
  "Definite": {
33
- "p": 0.9642147117,
34
- "r": 0.9581193204,
35
- "f": 0.9611573524
36
  },
37
  "Gender": {
38
- "p": 0.9558823529,
39
- "r": 0.9504818877,
40
- "f": 0.9531744709
41
  },
42
  "Number": {
43
- "p": 0.96478318,
44
- "r": 0.9574856547,
45
- "f": 0.9611205655
46
  },
47
  "AdpType": {
48
  "p": 1.0,
49
- "r": 0.9920424403,
50
- "f": 0.9960053262
51
  },
52
  "PartType": {
53
  "p": 1.0,
@@ -55,29 +55,29 @@
55
  "f": 1.0
56
  },
57
  "Case": {
58
- "p": 0.9791666667,
59
- "r": 0.9652448657,
60
- "f": 0.9721559268
61
  },
62
  "Person": {
63
- "p": 0.9857651246,
64
- "r": 0.9840142096,
65
- "f": 0.9848888889
66
  },
67
  "PronType": {
68
- "p": 0.9868095631,
69
- "r": 0.984375,
70
- "f": 0.9855907781
71
  },
72
  "NumType": {
73
- "p": 0.972972973,
74
- "r": 0.9536423841,
75
- "f": 0.9632107023
76
  },
77
  "Degree": {
78
- "p": 0.9524969549,
79
- "r": 0.9421686747,
80
- "f": 0.9473046638
81
  },
82
  "Reflex": {
83
  "p": 1.0,
@@ -110,146 +110,146 @@
110
  "f": 1.0
111
  },
112
  "Polite": {
113
- "p": 0.75,
114
- "r": 0.75,
115
- "f": 0.75
116
  }
117
  },
118
- "sents_p": 0.9103942652,
119
- "sents_r": 0.9007092199,
120
- "sents_f": 0.9055258467,
121
- "dep_uas": 0.8220111732,
122
- "dep_las": 0.7817440366,
123
  "dep_las_per_type": {
124
  "advmod": {
125
- "p": 0.6882758621,
126
- "r": 0.7048022599,
127
- "f": 0.6964410328
128
  },
129
  "root": {
130
- "p": 0.8369175627,
131
- "r": 0.8280141844,
132
- "f": 0.8324420677
133
  },
134
  "nsubj": {
135
- "p": 0.8381256656,
136
- "r": 0.8301687764,
137
- "f": 0.8341282459
138
  },
139
  "case": {
140
- "p": 0.8997020854,
141
- "r": 0.8934911243,
142
- "f": 0.8965858486
143
  },
144
  "obl": {
145
- "p": 0.7044728435,
146
- "r": 0.6847826087,
147
- "f": 0.694488189
148
  },
149
  "cc": {
150
- "p": 0.7735294118,
151
- "r": 0.7645348837,
152
- "f": 0.769005848
153
  },
154
  "conj": {
155
- "p": 0.6239782016,
156
- "r": 0.6106666667,
157
- "f": 0.6172506739
158
  },
159
  "obj": {
160
- "p": 0.7985347985,
161
- "r": 0.8466019417,
162
- "f": 0.821866164
163
  },
164
  "aux": {
165
- "p": 0.8735294118,
166
- "r": 0.8658892128,
167
- "f": 0.8696925329
168
  },
169
  "acl:relcl": {
170
- "p": 0.6271186441,
171
- "r": 0.6,
172
- "f": 0.6132596685
173
  },
174
  "advmod:lmod": {
175
- "p": 0.7014925373,
176
- "r": 0.7014925373,
177
- "f": 0.7014925373
178
  },
179
  "det": {
180
- "p": 0.9129720854,
181
- "r": 0.9159802306,
182
- "f": 0.9144736842
183
  },
184
  "amod": {
185
- "p": 0.8073089701,
186
- "r": 0.8293515358,
187
- "f": 0.8181818182
188
  },
189
  "nmod:poss": {
190
- "p": 0.7741935484,
191
- "r": 0.7128712871,
192
- "f": 0.7422680412
193
  },
194
  "ccomp": {
195
- "p": 0.6865671642,
196
- "r": 0.7419354839,
197
- "f": 0.7131782946
198
  },
199
  "nummod": {
200
- "p": 0.8536585366,
201
- "r": 0.875,
202
- "f": 0.8641975309
203
  },
204
  "flat": {
205
- "p": 0.7784431138,
206
- "r": 0.8609271523,
207
- "f": 0.8176100629
208
  },
209
  "compound:prt": {
210
- "p": 0.4411764706,
211
- "r": 0.3658536585,
212
- "f": 0.4
213
  },
214
  "advcl": {
215
- "p": 0.6120689655,
216
  "r": 0.6120689655,
217
- "f": 0.6120689655
218
  },
219
  "mark": {
220
- "p": 0.889596603,
221
- "r": 0.8603696099,
222
- "f": 0.8747390397
223
  },
224
  "cop": {
225
- "p": 0.8021978022,
226
- "r": 0.8342857143,
227
- "f": 0.8179271709
228
  },
229
  "dep": {
230
- "p": 0.1304347826,
231
- "r": 0.2264150943,
232
- "f": 0.1655172414
233
  },
234
  "nmod": {
235
- "p": 0.6620825147,
236
- "r": 0.658203125,
237
- "f": 0.6601371205
238
  },
239
  "iobj": {
240
- "p": 0.8333333333,
241
- "r": 0.4545454545,
242
- "f": 0.5882352941
243
  },
244
  "xcomp": {
245
- "p": 0.5365853659,
246
- "r": 0.3728813559,
247
- "f": 0.44
248
  },
249
  "list": {
250
- "p": 0.3333333333,
251
  "r": 0.2222222222,
252
- "f": 0.2666666667
253
  },
254
  "vocative": {
255
  "p": 0.0,
@@ -257,14 +257,14 @@
257
  "f": 0.0
258
  },
259
  "fixed": {
260
- "p": 0.8461538462,
261
- "r": 0.8048780488,
262
- "f": 0.825
263
  },
264
  "expl": {
265
- "p": 0.8484848485,
266
- "r": 0.8235294118,
267
- "f": 0.8358208955
268
  },
269
  "appos": {
270
  "p": 0.5862068966,
@@ -272,9 +272,9 @@
272
  "f": 0.5483870968
273
  },
274
  "obl:tmod": {
275
- "p": 0.8333333333,
276
- "r": 0.2777777778,
277
- "f": 0.4166666667
278
  },
279
  "discourse": {
280
  "p": 0.0,
@@ -287,32 +287,32 @@
287
  "f": 0.0
288
  }
289
  },
290
- "lemma_acc": 0.9518644068,
291
- "tag_acc": 0.9641646489,
292
- "ents_p": 0.8161157025,
293
- "ents_r": 0.8229166667,
294
- "ents_f": 0.8195020747,
295
  "ents_per_type": {
296
  "PER": {
297
- "p": 0.9230769231,
298
- "r": 0.8674698795,
299
- "f": 0.8944099379
300
  },
301
  "ORG": {
302
- "p": 0.7528089888,
303
- "r": 0.7444444444,
304
- "f": 0.748603352
305
  },
306
  "MISC": {
307
- "p": 0.6910569106,
308
- "r": 0.7522123894,
309
- "f": 0.7203389831
310
  },
311
  "LOC": {
312
- "p": 0.8534482759,
313
- "r": 0.8918918919,
314
- "f": 0.872246696
315
  }
316
  },
317
- "speed": 12175.9946774514
318
  }
 
1
  {
2
+ "token_acc": 0.9989350373,
3
  "token_p": 0.9977732598,
4
  "token_r": 0.9974835463,
5
  "token_f": 0.997628382,
6
+ "pos_acc": 0.9665859564,
7
+ "morph_acc": 0.9573849879,
8
+ "morph_micro_p": 0.9742794693,
9
+ "morph_micro_r": 0.967492807,
10
+ "morph_micro_f": 0.9708742782,
11
  "morph_per_feat": {
12
  "Mood": {
13
+ "p": 0.982791587,
14
+ "r": 0.9799809342,
15
+ "f": 0.9813842482
16
  },
17
  "Tense": {
18
+ "p": 0.9796072508,
19
+ "r": 0.9766566265,
20
+ "f": 0.9781297134
21
  },
22
  "VerbForm": {
23
+ "p": 0.9710412816,
24
+ "r": 0.964504284,
25
+ "f": 0.9677617439
26
  },
27
  "Voice": {
28
+ "p": 0.983495874,
29
+ "r": 0.9798206278,
30
+ "f": 0.9816548109
31
  },
32
  "Definite": {
33
+ "p": 0.9669585987,
34
+ "r": 0.9596997234,
35
+ "f": 0.9633154868
36
  },
37
  "Gender": {
38
+ "p": 0.9589315526,
39
+ "r": 0.9544699236,
40
+ "f": 0.9566955363
41
  },
42
  "Number": {
43
+ "p": 0.967648606,
44
+ "r": 0.9595722483,
45
+ "f": 0.9635935045
46
  },
47
  "AdpType": {
48
  "p": 1.0,
49
+ "r": 0.9893899204,
50
+ "f": 0.9946666667
51
  },
52
  "PartType": {
53
  "p": 1.0,
 
55
  "f": 1.0
56
  },
57
  "Case": {
58
+ "p": 0.9856,
59
+ "r": 0.9731437599,
60
+ "f": 0.9793322734
61
  },
62
  "Person": {
63
+ "p": 0.9858156028,
64
+ "r": 0.9875666075,
65
+ "f": 0.9866903283
66
  },
67
  "PronType": {
68
+ "p": 0.9876441516,
69
+ "r": 0.9860197368,
70
+ "f": 0.9868312757
71
  },
72
  "NumType": {
73
+ "p": 0.9863945578,
74
+ "r": 0.9602649007,
75
+ "f": 0.9731543624
76
  },
77
  "Degree": {
78
+ "p": 0.9657701711,
79
+ "r": 0.9518072289,
80
+ "f": 0.9587378641
81
  },
82
  "Reflex": {
83
  "p": 1.0,
 
110
  "f": 1.0
111
  },
112
  "Polite": {
113
+ "p": 1.0,
114
+ "r": 0.5,
115
+ "f": 0.6666666667
116
  }
117
  },
118
+ "sents_p": 0.8908765653,
119
+ "sents_r": 0.8829787234,
120
+ "sents_f": 0.8869100623,
121
+ "dep_uas": 0.8225238813,
122
+ "dep_las": 0.7828612927,
123
  "dep_las_per_type": {
124
  "advmod": {
125
+ "p": 0.6876675603,
126
+ "r": 0.7245762712,
127
+ "f": 0.7056396149
128
  },
129
  "root": {
130
+ "p": 0.8240574506,
131
+ "r": 0.8138297872,
132
+ "f": 0.818911686
133
  },
134
  "nsubj": {
135
+ "p": 0.8513800425,
136
+ "r": 0.8459915612,
137
+ "f": 0.8486772487
138
  },
139
  "case": {
140
+ "p": 0.8941641939,
141
+ "r": 0.8915187377,
142
+ "f": 0.8928395062
143
  },
144
  "obl": {
145
+ "p": 0.7017828201,
146
+ "r": 0.6723602484,
147
+ "f": 0.6867565424
148
  },
149
  "cc": {
150
+ "p": 0.795389049,
151
+ "r": 0.8023255814,
152
+ "f": 0.7988422576
153
  },
154
  "conj": {
155
+ "p": 0.5918918919,
156
+ "r": 0.584,
157
+ "f": 0.5879194631
158
  },
159
  "obj": {
160
+ "p": 0.7781690141,
161
+ "r": 0.8582524272,
162
+ "f": 0.8162511542
163
  },
164
  "aux": {
165
+ "p": 0.8922155689,
166
+ "r": 0.8688046647,
167
+ "f": 0.8803545052
168
  },
169
  "acl:relcl": {
170
+ "p": 0.606741573,
171
+ "r": 0.5837837838,
172
+ "f": 0.5950413223
173
  },
174
  "advmod:lmod": {
175
+ "p": 0.7627118644,
176
+ "r": 0.671641791,
177
+ "f": 0.7142857143
178
  },
179
  "det": {
180
+ "p": 0.9247135843,
181
+ "r": 0.9308072488,
182
+ "f": 0.9277504105
183
  },
184
  "amod": {
185
+ "p": 0.8291032149,
186
+ "r": 0.8361774744,
187
+ "f": 0.8326253186
188
  },
189
  "nmod:poss": {
190
+ "p": 0.7052631579,
191
+ "r": 0.6633663366,
192
+ "f": 0.6836734694
193
  },
194
  "ccomp": {
195
+ "p": 0.5555555556,
196
+ "r": 0.6451612903,
197
+ "f": 0.5970149254
198
  },
199
  "nummod": {
200
+ "p": 0.811023622,
201
+ "r": 0.8583333333,
202
+ "f": 0.8340080972
203
  },
204
  "flat": {
205
+ "p": 0.7743902439,
206
+ "r": 0.8410596026,
207
+ "f": 0.8063492063
208
  },
209
  "compound:prt": {
210
+ "p": 0.3888888889,
211
+ "r": 0.3414634146,
212
+ "f": 0.3636363636
213
  },
214
  "advcl": {
215
+ "p": 0.6635514019,
216
  "r": 0.6120689655,
217
+ "f": 0.6367713004
218
  },
219
  "mark": {
220
+ "p": 0.8902953586,
221
+ "r": 0.8665297741,
222
+ "f": 0.878251821
223
  },
224
  "cop": {
225
+ "p": 0.8222222222,
226
+ "r": 0.8457142857,
227
+ "f": 0.8338028169
228
  },
229
  "dep": {
230
+ "p": 0.1111111111,
231
+ "r": 0.1509433962,
232
+ "f": 0.128
233
  },
234
  "nmod": {
235
+ "p": 0.6686626747,
236
+ "r": 0.654296875,
237
+ "f": 0.6614017769
238
  },
239
  "iobj": {
240
+ "p": 0.9,
241
+ "r": 0.4090909091,
242
+ "f": 0.5625
243
  },
244
  "xcomp": {
245
+ "p": 0.4468085106,
246
+ "r": 0.3559322034,
247
+ "f": 0.3962264151
248
  },
249
  "list": {
250
+ "p": 0.5,
251
  "r": 0.2222222222,
252
+ "f": 0.3076923077
253
  },
254
  "vocative": {
255
  "p": 0.0,
 
257
  "f": 0.0
258
  },
259
  "fixed": {
260
+ "p": 0.8888888889,
261
+ "r": 0.7804878049,
262
+ "f": 0.8311688312
263
  },
264
  "expl": {
265
+ "p": 0.8529411765,
266
+ "r": 0.8529411765,
267
+ "f": 0.8529411765
268
  },
269
  "appos": {
270
  "p": 0.5862068966,
 
272
  "f": 0.5483870968
273
  },
274
  "obl:tmod": {
275
+ "p": 0.75,
276
+ "r": 0.3333333333,
277
+ "f": 0.4615384615
278
  },
279
  "discourse": {
280
  "p": 0.0,
 
287
  "f": 0.0
288
  }
289
  },
290
+ "lemma_acc": 0.948377724,
291
+ "tag_acc": 0.9665859564,
292
+ "ents_p": 0.800407332,
293
+ "ents_r": 0.81875,
294
+ "ents_f": 0.8094747683,
295
  "ents_per_type": {
296
  "PER": {
297
+ "p": 0.893081761,
298
+ "r": 0.8554216867,
299
+ "f": 0.8738461538
300
  },
301
  "ORG": {
302
+ "p": 0.7222222222,
303
+ "r": 0.7222222222,
304
+ "f": 0.7222222222
305
  },
306
  "MISC": {
307
+ "p": 0.6771653543,
308
+ "r": 0.7610619469,
309
+ "f": 0.7166666667
310
  },
311
  "LOC": {
312
+ "p": 0.8695652174,
313
+ "r": 0.9009009009,
314
+ "f": 0.8849557522
315
  }
316
  },
317
+ "speed": 12117.291936941
318
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -81,8 +81,8 @@ nO = null
81
  [components.ner.model.tok2vec.embed]
82
  @architectures = "spacy.MultiHashEmbed.v2"
83
  width = 96
84
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
85
- rows = [5000,1000,2500,2500,50]
86
  include_static_vectors = true
87
 
88
  [components.ner.model.tok2vec.encode]
@@ -150,8 +150,8 @@ factory = "tok2vec"
150
  [components.tok2vec.model.embed]
151
  @architectures = "spacy.MultiHashEmbed.v2"
152
  width = ${components.tok2vec.model.encode:width}
153
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
154
- rows = [5000,1000,2500,2500,50]
155
  include_static_vectors = true
156
 
157
  [components.tok2vec.model.encode]
@@ -193,6 +193,7 @@ eval_frequency = 1000
193
  frozen_components = []
194
  before_to_disk = null
195
  annotating_components = []
 
196
 
197
  [training.batcher]
198
  @batchers = "spacy.batch_by_words.v1"
 
81
  [components.ner.model.tok2vec.embed]
82
  @architectures = "spacy.MultiHashEmbed.v2"
83
  width = 96
84
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
85
+ rows = [5000,1000,2500,2500]
86
  include_static_vectors = true
87
 
88
  [components.ner.model.tok2vec.encode]
 
150
  [components.tok2vec.model.embed]
151
  @architectures = "spacy.MultiHashEmbed.v2"
152
  width = ${components.tok2vec.model.encode:width}
153
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
154
+ rows = [5000,1000,2500,2500,50,50]
155
  include_static_vectors = true
156
 
157
  [components.tok2vec.model.encode]
 
193
  frozen_components = []
194
  before_to_disk = null
195
  annotating_components = []
196
+ before_update = null
197
 
198
  [training.batcher]
199
  @batchers = "spacy.batch_by_words.v1"
da_core_news_lg-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e579a353ad8e99bf8147886c23dd03a8ec936c221a0670eb4d69ead8a2c85e6a
3
- size 567080666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a4324a1fb101b9cba153037e37b072c9520704867cee55ff5cbf7979e89792
3
+ size 567067113
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3829adc1ce8feafa2cc991916850f93f546fd7a93605155c201240dbc31ea7c7
3
  size 175818
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9da20906ff7f67721b6f29e00be5d84df9c60fef30c9be81a78b39edfcffc2
3
  size 175818
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"da",
3
  "name":"core_news_lg",
4
- "version":"3.4.0",
5
  "description":"Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"[email protected]",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
- "spacy_version":">=3.4.0,<3.5.0",
11
- "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":300,
14
  "vectors":500000,
@@ -244,55 +244,55 @@
244
  "senter"
245
  ],
246
  "performance":{
247
- "token_acc":0.9994672349,
248
  "token_p":0.9977732598,
249
  "token_r":0.9974835463,
250
  "token_f":0.997628382,
251
- "pos_acc":0.9641646489,
252
- "morph_acc":0.9538014528,
253
- "morph_micro_p":0.9705792683,
254
- "morph_micro_r":0.9641613245,
255
- "morph_micro_f":0.9673596516,
256
  "morph_per_feat":{
257
  "Mood":{
258
- "p":0.9789473684,
259
- "r":0.97521449,
260
- "f":0.9770773639
261
  },
262
  "Tense":{
263
- "p":0.9743396226,
264
- "r":0.9721385542,
265
- "f":0.973237844
266
  },
267
  "VerbForm":{
268
- "p":0.9649014778,
269
- "r":0.958996328,
270
- "f":0.9619398404
271
  },
272
  "Voice":{
273
- "p":0.9789631856,
274
- "r":0.9738415546,
275
- "f":0.9763956538
276
  },
277
  "Definite":{
278
- "p":0.9642147117,
279
- "r":0.9581193204,
280
- "f":0.9611573524
281
  },
282
  "Gender":{
283
- "p":0.9558823529,
284
- "r":0.9504818877,
285
- "f":0.9531744709
286
  },
287
  "Number":{
288
- "p":0.96478318,
289
- "r":0.9574856547,
290
- "f":0.9611205655
291
  },
292
  "AdpType":{
293
  "p":1.0,
294
- "r":0.9920424403,
295
- "f":0.9960053262
296
  },
297
  "PartType":{
298
  "p":1.0,
@@ -300,29 +300,29 @@
300
  "f":1.0
301
  },
302
  "Case":{
303
- "p":0.9791666667,
304
- "r":0.9652448657,
305
- "f":0.9721559268
306
  },
307
  "Person":{
308
- "p":0.9857651246,
309
- "r":0.9840142096,
310
- "f":0.9848888889
311
  },
312
  "PronType":{
313
- "p":0.9868095631,
314
- "r":0.984375,
315
- "f":0.9855907781
316
  },
317
  "NumType":{
318
- "p":0.972972973,
319
- "r":0.9536423841,
320
- "f":0.9632107023
321
  },
322
  "Degree":{
323
- "p":0.9524969549,
324
- "r":0.9421686747,
325
- "f":0.9473046638
326
  },
327
  "Reflex":{
328
  "p":1.0,
@@ -355,146 +355,146 @@
355
  "f":1.0
356
  },
357
  "Polite":{
358
- "p":0.75,
359
- "r":0.75,
360
- "f":0.75
361
  }
362
  },
363
- "sents_p":0.9103942652,
364
- "sents_r":0.9007092199,
365
- "sents_f":0.9055258467,
366
- "dep_uas":0.8220111732,
367
- "dep_las":0.7817440366,
368
  "dep_las_per_type":{
369
  "advmod":{
370
- "p":0.6882758621,
371
- "r":0.7048022599,
372
- "f":0.6964410328
373
  },
374
  "root":{
375
- "p":0.8369175627,
376
- "r":0.8280141844,
377
- "f":0.8324420677
378
  },
379
  "nsubj":{
380
- "p":0.8381256656,
381
- "r":0.8301687764,
382
- "f":0.8341282459
383
  },
384
  "case":{
385
- "p":0.8997020854,
386
- "r":0.8934911243,
387
- "f":0.8965858486
388
  },
389
  "obl":{
390
- "p":0.7044728435,
391
- "r":0.6847826087,
392
- "f":0.694488189
393
  },
394
  "cc":{
395
- "p":0.7735294118,
396
- "r":0.7645348837,
397
- "f":0.769005848
398
  },
399
  "conj":{
400
- "p":0.6239782016,
401
- "r":0.6106666667,
402
- "f":0.6172506739
403
  },
404
  "obj":{
405
- "p":0.7985347985,
406
- "r":0.8466019417,
407
- "f":0.821866164
408
  },
409
  "aux":{
410
- "p":0.8735294118,
411
- "r":0.8658892128,
412
- "f":0.8696925329
413
  },
414
  "acl:relcl":{
415
- "p":0.6271186441,
416
- "r":0.6,
417
- "f":0.6132596685
418
  },
419
  "advmod:lmod":{
420
- "p":0.7014925373,
421
- "r":0.7014925373,
422
- "f":0.7014925373
423
  },
424
  "det":{
425
- "p":0.9129720854,
426
- "r":0.9159802306,
427
- "f":0.9144736842
428
  },
429
  "amod":{
430
- "p":0.8073089701,
431
- "r":0.8293515358,
432
- "f":0.8181818182
433
  },
434
  "nmod:poss":{
435
- "p":0.7741935484,
436
- "r":0.7128712871,
437
- "f":0.7422680412
438
  },
439
  "ccomp":{
440
- "p":0.6865671642,
441
- "r":0.7419354839,
442
- "f":0.7131782946
443
  },
444
  "nummod":{
445
- "p":0.8536585366,
446
- "r":0.875,
447
- "f":0.8641975309
448
  },
449
  "flat":{
450
- "p":0.7784431138,
451
- "r":0.8609271523,
452
- "f":0.8176100629
453
  },
454
  "compound:prt":{
455
- "p":0.4411764706,
456
- "r":0.3658536585,
457
- "f":0.4
458
  },
459
  "advcl":{
460
- "p":0.6120689655,
461
  "r":0.6120689655,
462
- "f":0.6120689655
463
  },
464
  "mark":{
465
- "p":0.889596603,
466
- "r":0.8603696099,
467
- "f":0.8747390397
468
  },
469
  "cop":{
470
- "p":0.8021978022,
471
- "r":0.8342857143,
472
- "f":0.8179271709
473
  },
474
  "dep":{
475
- "p":0.1304347826,
476
- "r":0.2264150943,
477
- "f":0.1655172414
478
  },
479
  "nmod":{
480
- "p":0.6620825147,
481
- "r":0.658203125,
482
- "f":0.6601371205
483
  },
484
  "iobj":{
485
- "p":0.8333333333,
486
- "r":0.4545454545,
487
- "f":0.5882352941
488
  },
489
  "xcomp":{
490
- "p":0.5365853659,
491
- "r":0.3728813559,
492
- "f":0.44
493
  },
494
  "list":{
495
- "p":0.3333333333,
496
  "r":0.2222222222,
497
- "f":0.2666666667
498
  },
499
  "vocative":{
500
  "p":0.0,
@@ -502,14 +502,14 @@
502
  "f":0.0
503
  },
504
  "fixed":{
505
- "p":0.8461538462,
506
- "r":0.8048780488,
507
- "f":0.825
508
  },
509
  "expl":{
510
- "p":0.8484848485,
511
- "r":0.8235294118,
512
- "f":0.8358208955
513
  },
514
  "appos":{
515
  "p":0.5862068966,
@@ -517,9 +517,9 @@
517
  "f":0.5483870968
518
  },
519
  "obl:tmod":{
520
- "p":0.8333333333,
521
- "r":0.2777777778,
522
- "f":0.4166666667
523
  },
524
  "discourse":{
525
  "p":0.0,
@@ -532,34 +532,34 @@
532
  "f":0.0
533
  }
534
  },
535
- "lemma_acc":0.9518644068,
536
- "tag_acc":0.9641646489,
537
- "ents_p":0.8161157025,
538
- "ents_r":0.8229166667,
539
- "ents_f":0.8195020747,
540
  "ents_per_type":{
541
  "PER":{
542
- "p":0.9230769231,
543
- "r":0.8674698795,
544
- "f":0.8944099379
545
  },
546
  "ORG":{
547
- "p":0.7528089888,
548
- "r":0.7444444444,
549
- "f":0.748603352
550
  },
551
  "MISC":{
552
- "p":0.6910569106,
553
- "r":0.7522123894,
554
- "f":0.7203389831
555
  },
556
  "LOC":{
557
- "p":0.8534482759,
558
- "r":0.8918918919,
559
- "f":0.872246696
560
  }
561
  },
562
- "speed":12175.9946774514
563
  },
564
  "sources":[
565
  {
 
1
  {
2
  "lang":"da",
3
  "name":"core_news_lg",
4
+ "version":"3.5.0",
5
  "description":"Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"[email protected]",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":500000,
 
244
  "senter"
245
  ],
246
  "performance":{
247
+ "token_acc":0.9989350373,
248
  "token_p":0.9977732598,
249
  "token_r":0.9974835463,
250
  "token_f":0.997628382,
251
+ "pos_acc":0.9665859564,
252
+ "morph_acc":0.9573849879,
253
+ "morph_micro_p":0.9742794693,
254
+ "morph_micro_r":0.967492807,
255
+ "morph_micro_f":0.9708742782,
256
  "morph_per_feat":{
257
  "Mood":{
258
+ "p":0.982791587,
259
+ "r":0.9799809342,
260
+ "f":0.9813842482
261
  },
262
  "Tense":{
263
+ "p":0.9796072508,
264
+ "r":0.9766566265,
265
+ "f":0.9781297134
266
  },
267
  "VerbForm":{
268
+ "p":0.9710412816,
269
+ "r":0.964504284,
270
+ "f":0.9677617439
271
  },
272
  "Voice":{
273
+ "p":0.983495874,
274
+ "r":0.9798206278,
275
+ "f":0.9816548109
276
  },
277
  "Definite":{
278
+ "p":0.9669585987,
279
+ "r":0.9596997234,
280
+ "f":0.9633154868
281
  },
282
  "Gender":{
283
+ "p":0.9589315526,
284
+ "r":0.9544699236,
285
+ "f":0.9566955363
286
  },
287
  "Number":{
288
+ "p":0.967648606,
289
+ "r":0.9595722483,
290
+ "f":0.9635935045
291
  },
292
  "AdpType":{
293
  "p":1.0,
294
+ "r":0.9893899204,
295
+ "f":0.9946666667
296
  },
297
  "PartType":{
298
  "p":1.0,
 
300
  "f":1.0
301
  },
302
  "Case":{
303
+ "p":0.9856,
304
+ "r":0.9731437599,
305
+ "f":0.9793322734
306
  },
307
  "Person":{
308
+ "p":0.9858156028,
309
+ "r":0.9875666075,
310
+ "f":0.9866903283
311
  },
312
  "PronType":{
313
+ "p":0.9876441516,
314
+ "r":0.9860197368,
315
+ "f":0.9868312757
316
  },
317
  "NumType":{
318
+ "p":0.9863945578,
319
+ "r":0.9602649007,
320
+ "f":0.9731543624
321
  },
322
  "Degree":{
323
+ "p":0.9657701711,
324
+ "r":0.9518072289,
325
+ "f":0.9587378641
326
  },
327
  "Reflex":{
328
  "p":1.0,
 
355
  "f":1.0
356
  },
357
  "Polite":{
358
+ "p":1.0,
359
+ "r":0.5,
360
+ "f":0.6666666667
361
  }
362
  },
363
+ "sents_p":0.8908765653,
364
+ "sents_r":0.8829787234,
365
+ "sents_f":0.8869100623,
366
+ "dep_uas":0.8225238813,
367
+ "dep_las":0.7828612927,
368
  "dep_las_per_type":{
369
  "advmod":{
370
+ "p":0.6876675603,
371
+ "r":0.7245762712,
372
+ "f":0.7056396149
373
  },
374
  "root":{
375
+ "p":0.8240574506,
376
+ "r":0.8138297872,
377
+ "f":0.818911686
378
  },
379
  "nsubj":{
380
+ "p":0.8513800425,
381
+ "r":0.8459915612,
382
+ "f":0.8486772487
383
  },
384
  "case":{
385
+ "p":0.8941641939,
386
+ "r":0.8915187377,
387
+ "f":0.8928395062
388
  },
389
  "obl":{
390
+ "p":0.7017828201,
391
+ "r":0.6723602484,
392
+ "f":0.6867565424
393
  },
394
  "cc":{
395
+ "p":0.795389049,
396
+ "r":0.8023255814,
397
+ "f":0.7988422576
398
  },
399
  "conj":{
400
+ "p":0.5918918919,
401
+ "r":0.584,
402
+ "f":0.5879194631
403
  },
404
  "obj":{
405
+ "p":0.7781690141,
406
+ "r":0.8582524272,
407
+ "f":0.8162511542
408
  },
409
  "aux":{
410
+ "p":0.8922155689,
411
+ "r":0.8688046647,
412
+ "f":0.8803545052
413
  },
414
  "acl:relcl":{
415
+ "p":0.606741573,
416
+ "r":0.5837837838,
417
+ "f":0.5950413223
418
  },
419
  "advmod:lmod":{
420
+ "p":0.7627118644,
421
+ "r":0.671641791,
422
+ "f":0.7142857143
423
  },
424
  "det":{
425
+ "p":0.9247135843,
426
+ "r":0.9308072488,
427
+ "f":0.9277504105
428
  },
429
  "amod":{
430
+ "p":0.8291032149,
431
+ "r":0.8361774744,
432
+ "f":0.8326253186
433
  },
434
  "nmod:poss":{
435
+ "p":0.7052631579,
436
+ "r":0.6633663366,
437
+ "f":0.6836734694
438
  },
439
  "ccomp":{
440
+ "p":0.5555555556,
441
+ "r":0.6451612903,
442
+ "f":0.5970149254
443
  },
444
  "nummod":{
445
+ "p":0.811023622,
446
+ "r":0.8583333333,
447
+ "f":0.8340080972
448
  },
449
  "flat":{
450
+ "p":0.7743902439,
451
+ "r":0.8410596026,
452
+ "f":0.8063492063
453
  },
454
  "compound:prt":{
455
+ "p":0.3888888889,
456
+ "r":0.3414634146,
457
+ "f":0.3636363636
458
  },
459
  "advcl":{
460
+ "p":0.6635514019,
461
  "r":0.6120689655,
462
+ "f":0.6367713004
463
  },
464
  "mark":{
465
+ "p":0.8902953586,
466
+ "r":0.8665297741,
467
+ "f":0.878251821
468
  },
469
  "cop":{
470
+ "p":0.8222222222,
471
+ "r":0.8457142857,
472
+ "f":0.8338028169
473
  },
474
  "dep":{
475
+ "p":0.1111111111,
476
+ "r":0.1509433962,
477
+ "f":0.128
478
  },
479
  "nmod":{
480
+ "p":0.6686626747,
481
+ "r":0.654296875,
482
+ "f":0.6614017769
483
  },
484
  "iobj":{
485
+ "p":0.9,
486
+ "r":0.4090909091,
487
+ "f":0.5625
488
  },
489
  "xcomp":{
490
+ "p":0.4468085106,
491
+ "r":0.3559322034,
492
+ "f":0.3962264151
493
  },
494
  "list":{
495
+ "p":0.5,
496
  "r":0.2222222222,
497
+ "f":0.3076923077
498
  },
499
  "vocative":{
500
  "p":0.0,
 
502
  "f":0.0
503
  },
504
  "fixed":{
505
+ "p":0.8888888889,
506
+ "r":0.7804878049,
507
+ "f":0.8311688312
508
  },
509
  "expl":{
510
+ "p":0.8529411765,
511
+ "r":0.8529411765,
512
+ "f":0.8529411765
513
  },
514
  "appos":{
515
  "p":0.5862068966,
 
517
  "f":0.5483870968
518
  },
519
  "obl:tmod":{
520
+ "p":0.75,
521
+ "r":0.3333333333,
522
+ "f":0.4615384615
523
  },
524
  "discourse":{
525
  "p":0.0,
 
532
  "f":0.0
533
  }
534
  },
535
+ "lemma_acc":0.948377724,
536
+ "tag_acc":0.9665859564,
537
+ "ents_p":0.800407332,
538
+ "ents_r":0.81875,
539
+ "ents_f":0.8094747683,
540
  "ents_per_type":{
541
  "PER":{
542
+ "p":0.893081761,
543
+ "r":0.8554216867,
544
+ "f":0.8738461538
545
  },
546
  "ORG":{
547
+ "p":0.7222222222,
548
+ "r":0.7222222222,
549
+ "f":0.7222222222
550
  },
551
  "MISC":{
552
+ "p":0.6771653543,
553
+ "r":0.7610619469,
554
+ "f":0.7166666667
555
  },
556
  "LOC":{
557
+ "p":0.8695652174,
558
+ "r":0.9009009009,
559
+ "f":0.8849557522
560
  }
561
  },
562
+ "speed":12117.291936941
563
  },
564
  "sources":[
565
  {
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1883862e52aaf5c8afafe1cea486f9e330d9ad501ae6de50be9a5890c45507be
3
  size 61739
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80570647ee692f4878a313e165d5463a48f80a440c8cda497cf5ec1649c10bee
3
  size 61739
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bae7b8d1ea36a33a0b9bf5eba7943dc15bff312d18d5cca83978f5fcaf2eca12
3
- size 6496592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6631c0b912be78beb5ea59518389b0bfd8c736e78132f725f45e9b593a12fb2b
3
+ size 6366382
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b771fefca92f3487eb2f030cfd74df06ebf5f2a1d83418942285089ce82e8180
3
  size 308728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a213b1dd3b499ed3e7ffac3d627d34904eaaab3d272f031337638ba9b33d2946
3
  size 308728
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abbf6466e28bf155126cd68c6be2543ec6789a3fd20efbeb781e2138b4921972
3
  size 219953
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1943fee850b11a368df6c22207955e43241a45a8937ac6e1000fa6b4558a5d76
3
  size 219953
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed33c877ffd457539da81a8853be55873ff2b2dd8d76714183c9fbe87250550f
3
- size 6365604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0745e0bb12f1667fd91115f003fad1cd890a9e632f4887e5dfa3f4a36caa72
3
+ size 6495793
tokenizer CHANGED
The diff for this file is too large to render. See raw diff
 
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d087b63907f4906a9582161d47cb1148e5cd6ba59a630b1d17af613f516adfd2
3
- size 10086471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92c5316ca69ffe851e0f67e7fe45c341b3bec438348b036a48193708f360352
3
+ size 10087026