Update
Browse files- README.md +63 -65
- dev.tsv +0 -0
- loss.tsv +116 -51
- pytorch_model.bin +2 -2
- test.tsv +0 -0
- train.py +54 -0
- training.log +0 -0
README.md
CHANGED
@@ -11,9 +11,9 @@ widget:
|
|
11 |
# POET: A French Extended Part-of-Speech Tagger
|
12 |
|
13 |
- Corpora: [ANTILLES](https://github.com/qanastek/ANTILLES)
|
14 |
-
- Embeddings: [
|
15 |
- Sequence Labelling: [Bi-LSTM-CRF](https://arxiv.org/abs/1011.4088)
|
16 |
-
- Number of Epochs:
|
17 |
|
18 |
**People Involved**
|
19 |
|
@@ -34,7 +34,7 @@ from flair.data import Sentence
|
|
34 |
from flair.models import SequenceTagger
|
35 |
|
36 |
# Load the model
|
37 |
-
model = SequenceTagger.load("qanastek/pos-french
|
38 |
|
39 |
sentence = Sentence("George Washington est allé à Washington")
|
40 |
|
@@ -141,78 +141,76 @@ The test corpora used for this evaluation is available on [Github](https://githu
|
|
141 |
|
142 |
```plain
|
143 |
Results:
|
144 |
-
- F-score (micro) 0.
|
145 |
-
- F-score (macro) 0.
|
146 |
-
- Accuracy 0.
|
147 |
|
148 |
By class:
|
149 |
precision recall f1-score support
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
DETFS 1.0000 1.0000 1.0000 240
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
PPER1S 1.0000 1.0000 1.0000 42
|
185 |
-
SYM 1.0000 0.9474 0.9730 38
|
186 |
-
NOUN 0.8824 0.7692 0.8219 39
|
187 |
-
PRON 1.0000 0.9677 0.9836 31
|
188 |
-
PDEMFS 1.0000 1.0000 1.0000 29
|
189 |
-
VPPMP 0.9286 1.0000 0.9630 26
|
190 |
-
ADJ 0.9524 0.9091 0.9302 22
|
191 |
-
PPER3MP 1.0000 1.0000 1.0000 20
|
192 |
-
VPPFP 1.0000 1.0000 1.0000 19
|
193 |
-
PPER3FS 1.0000 1.0000 1.0000 18
|
194 |
-
MOTINC 0.3333 0.4000 0.3636 15
|
195 |
-
PREFS 1.0000 1.0000 1.0000 10
|
196 |
-
PPOBJMP 1.0000 0.8000 0.8889 10
|
197 |
-
PPOBJFS 0.6250 0.8333 0.7143 6
|
198 |
-
INTJ 0.5000 0.6667 0.5714 6
|
199 |
-
PART 1.0000 1.0000 1.0000 4
|
200 |
-
PDEMMP 1.0000 1.0000 1.0000 3
|
201 |
-
PDEMFP 1.0000 1.0000 1.0000 3
|
202 |
PPER3FP 1.0000 1.0000 1.0000 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
NUM 1.0000 0.3333 0.5000 3
|
204 |
PPER2S 1.0000 1.0000 1.0000 2
|
205 |
-
PPOBJFP
|
206 |
-
|
207 |
-
PINDFS 0.5000 1.0000 0.6667 1
|
208 |
-
PINDMP 1.0000 1.0000 1.0000 1
|
209 |
X 0.0000 0.0000 0.0000 1
|
|
|
210 |
PINDFP 1.0000 1.0000 1.0000 1
|
211 |
|
212 |
-
|
213 |
-
macro avg 0.
|
214 |
-
weighted avg 0.
|
215 |
-
samples avg 0.9797 0.9797 0.9797 10019
|
216 |
```
|
217 |
|
218 |
## BibTeX Citations
|
|
|
11 |
# POET: A French Extended Part-of-Speech Tagger
|
12 |
|
13 |
- Corpora: [ANTILLES](https://github.com/qanastek/ANTILLES)
|
14 |
+
- Embeddings: [FastText](https://fasttext.cc/)
|
15 |
- Sequence Labelling: [Bi-LSTM-CRF](https://arxiv.org/abs/1011.4088)
|
16 |
+
- Number of Epochs: 115
|
17 |
|
18 |
**People Involved**
|
19 |
|
|
|
34 |
from flair.models import SequenceTagger
|
35 |
|
36 |
# Load the model
|
37 |
+
model = SequenceTagger.load("qanastek/pos-french")
|
38 |
|
39 |
sentence = Sentence("George Washington est allé à Washington")
|
40 |
|
|
|
141 |
|
142 |
```plain
|
143 |
Results:
|
144 |
+
- F-score (micro): 0.952
|
145 |
+
- F-score (macro): 0.8644
|
146 |
+
- Accuracy (incl. no class): 0.952
|
147 |
|
148 |
By class:
|
149 |
precision recall f1-score support
|
150 |
+
PPER1S 0.9767 1.0000 0.9882 42
|
151 |
+
VERB 0.9823 0.9537 0.9678 583
|
152 |
+
COSUB 0.9344 0.8906 0.9120 128
|
153 |
+
PUNCT 0.9878 0.9688 0.9782 833
|
154 |
+
PREP 0.9767 0.9879 0.9822 1483
|
155 |
+
PDEMMS 0.9583 0.9200 0.9388 75
|
156 |
+
COCO 0.9839 1.0000 0.9919 245
|
157 |
+
DET 0.9679 0.9814 0.9746 645
|
158 |
+
NMP 0.9521 0.9115 0.9313 305
|
159 |
+
ADJMP 0.8352 0.9268 0.8786 82
|
160 |
+
PREL 0.9324 0.9857 0.9583 70
|
161 |
+
PREFP 0.9767 0.9545 0.9655 44
|
162 |
+
AUX 0.9537 0.9859 0.9695 355
|
163 |
+
ADV 0.9440 0.9365 0.9402 504
|
164 |
+
VPPMP 0.8667 1.0000 0.9286 26
|
165 |
+
DINTMS 0.9919 1.0000 0.9959 122
|
166 |
+
ADJMS 0.9020 0.9057 0.9039 244
|
167 |
+
NMS 0.9226 0.9336 0.9281 753
|
168 |
+
NFS 0.9347 0.9714 0.9527 560
|
169 |
+
YPFOR 0.9806 1.0000 0.9902 353
|
170 |
+
PINDMS 1.0000 0.9091 0.9524 44
|
171 |
+
NOUN 0.8400 0.5385 0.6562 39
|
172 |
+
PROPN 0.8605 0.8278 0.8439 395
|
173 |
+
DETMS 0.9972 0.9972 0.9972 362
|
174 |
+
PPER3MS 0.9341 0.9770 0.9551 87
|
175 |
+
VPPMS 0.8994 0.9682 0.9325 157
|
176 |
DETFS 1.0000 1.0000 1.0000 240
|
177 |
+
ADJFS 0.9266 0.9011 0.9136 182
|
178 |
+
ADJFP 0.9726 0.9342 0.9530 76
|
179 |
+
NFP 0.9463 0.9749 0.9604 199
|
180 |
+
VPPFS 0.8000 0.9000 0.8471 40
|
181 |
+
CHIF 0.9543 0.9414 0.9478 222
|
182 |
+
XFAMIL 0.9346 0.8696 0.9009 115
|
183 |
+
PPER3MP 0.9474 0.9000 0.9231 20
|
184 |
+
PPOBJMS 0.8800 0.9362 0.9072 47
|
185 |
+
PREF 0.8889 0.9231 0.9057 52
|
186 |
+
PPOBJMP 1.0000 0.6000 0.7500 10
|
187 |
+
SYM 0.9706 0.8684 0.9167 38
|
188 |
+
DINTFS 0.9683 1.0000 0.9839 61
|
189 |
+
PDEMFS 1.0000 0.8966 0.9455 29
|
190 |
+
PPER3FS 1.0000 0.9444 0.9714 18
|
191 |
+
VPPFP 0.9500 1.0000 0.9744 19
|
192 |
+
PRON 0.9200 0.7419 0.8214 31
|
193 |
+
PPOBJFS 0.8333 0.8333 0.8333 6
|
194 |
+
PART 0.8000 1.0000 0.8889 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
PPER3FP 1.0000 1.0000 1.0000 2
|
196 |
+
MOTINC 0.3571 0.3333 0.3448 15
|
197 |
+
PDEMMP 1.0000 0.6667 0.8000 3
|
198 |
+
INTJ 0.4000 0.6667 0.5000 6
|
199 |
+
PREFS 1.0000 0.5000 0.6667 10
|
200 |
+
ADJ 0.7917 0.8636 0.8261 22
|
201 |
+
PINDMP 0.0000 0.0000 0.0000 1
|
202 |
+
PINDFS 1.0000 1.0000 1.0000 1
|
203 |
NUM 1.0000 0.3333 0.5000 3
|
204 |
PPER2S 1.0000 1.0000 1.0000 2
|
205 |
+
PPOBJFP 1.0000 0.5000 0.6667 2
|
206 |
+
PDEMFP 1.0000 0.6667 0.8000 3
|
|
|
|
|
207 |
X 0.0000 0.0000 0.0000 1
|
208 |
+
PRELMS 1.0000 1.0000 1.0000 2
|
209 |
PINDFP 1.0000 1.0000 1.0000 1
|
210 |
|
211 |
+
accuracy 0.9520 10019
|
212 |
+
macro avg 0.8956 0.8521 0.8644 10019
|
213 |
+
weighted avg 0.9524 0.9520 0.9515 10019
|
|
|
214 |
```
|
215 |
|
216 |
## BibTeX Citations
|
dev.tsv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
loss.tsv
CHANGED
@@ -1,51 +1,116 @@
|
|
1 |
-
EPOCH TIMESTAMP BAD_EPOCHS LEARNING_RATE TRAIN_LOSS DEV_LOSS
|
2 |
-
1
|
3 |
-
2
|
4 |
-
3
|
5 |
-
4
|
6 |
-
5
|
7 |
-
6 08:
|
8 |
-
7
|
9 |
-
8
|
10 |
-
9
|
11 |
-
10
|
12 |
-
11
|
13 |
-
12
|
14 |
-
13
|
15 |
-
14
|
16 |
-
15
|
17 |
-
16
|
18 |
-
17
|
19 |
-
18
|
20 |
-
19
|
21 |
-
20
|
22 |
-
21
|
23 |
-
22
|
24 |
-
23
|
25 |
-
24
|
26 |
-
25
|
27 |
-
26
|
28 |
-
27
|
29 |
-
28
|
30 |
-
29
|
31 |
-
30
|
32 |
-
31
|
33 |
-
32
|
34 |
-
33
|
35 |
-
34
|
36 |
-
35
|
37 |
-
36
|
38 |
-
37
|
39 |
-
38
|
40 |
-
39
|
41 |
-
40
|
42 |
-
41
|
43 |
-
42
|
44 |
-
43
|
45 |
-
44
|
46 |
-
45
|
47 |
-
46
|
48 |
-
47
|
49 |
-
48
|
50 |
-
49
|
51 |
-
50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP BAD_EPOCHS LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_ACCURACY
|
2 |
+
1 14:02:20 0 0.1000 68.69758622836223 33.562286376953125 0.6341
|
3 |
+
2 14:03:39 0 0.1000 28.112464617838903 14.451913833618164 0.842
|
4 |
+
3 14:04:57 0 0.1000 18.645767338507998 9.90114974975586 0.8889
|
5 |
+
4 14:06:15 0 0.1000 15.248240833788847 7.970705986022949 0.9102
|
6 |
+
5 14:07:32 0 0.1000 13.478108287912555 7.087363243103027 0.9182
|
7 |
+
6 14:08:49 0 0.1000 12.370107963021878 6.438025951385498 0.9242
|
8 |
+
7 14:10:06 0 0.1000 11.615934616696519 5.967233657836914 0.9301
|
9 |
+
8 14:11:23 0 0.1000 10.971208243243462 5.777602195739746 0.9305
|
10 |
+
9 14:12:41 0 0.1000 10.598726795837942 5.393132209777832 0.9343
|
11 |
+
10 14:13:57 1 0.1000 10.203757547699245 5.497661590576172 0.9333
|
12 |
+
11 14:14:57 0 0.1000 9.893801364223513 5.045197486877441 0.9384
|
13 |
+
12 14:16:14 0 0.1000 9.663424407486367 4.956379413604736 0.94
|
14 |
+
13 14:17:34 0 0.1000 9.477814227078868 4.854061126708984 0.9407
|
15 |
+
14 14:18:50 0 0.1000 9.267724408512622 4.6637468338012695 0.9437
|
16 |
+
15 14:20:07 1 0.1000 9.154714643427756 4.723843574523926 0.9425
|
17 |
+
16 14:21:03 0 0.1000 8.99992180714565 4.600645542144775 0.9449
|
18 |
+
17 14:22:23 0 0.1000 8.837092483993125 4.431546211242676 0.9457
|
19 |
+
18 14:23:44 0 0.1000 8.720927441014652 4.406030178070068 0.9458
|
20 |
+
19 14:25:02 0 0.1000 8.62262540580952 4.378823757171631 0.9467
|
21 |
+
20 14:26:19 0 0.1000 8.506264420737207 4.324984550476074 0.9473
|
22 |
+
21 14:27:36 0 0.1000 8.41792546331355 4.271585941314697 0.9478
|
23 |
+
22 14:28:57 0 0.1000 8.313587416589787 4.242696285247803 0.9481
|
24 |
+
23 14:30:23 0 0.1000 8.193923979733897 4.192256927490234 0.9487
|
25 |
+
24 14:31:43 0 0.1000 8.11061666708077 4.094086647033691 0.9502
|
26 |
+
25 14:33:00 1 0.1000 7.994365789194022 4.010470390319824 0.9498
|
27 |
+
26 14:34:00 0 0.1000 8.008020202670478 4.04341983795166 0.9504
|
28 |
+
27 14:35:18 0 0.1000 7.865361327618624 3.950613498687744 0.9514
|
29 |
+
28 14:36:35 1 0.1000 7.784816885416487 3.975137233734131 0.9506
|
30 |
+
29 14:37:32 2 0.1000 7.764582988435188 3.91574764251709 0.9511
|
31 |
+
30 14:38:28 0 0.1000 7.675331065085082 3.923271894454956 0.9516
|
32 |
+
31 14:39:47 0 0.1000 7.6415734966244315 3.890695333480835 0.9531
|
33 |
+
32 14:41:07 1 0.1000 7.5645937328845 3.8633809089660645 0.9521
|
34 |
+
33 14:42:03 0 0.1000 7.523175163606627 3.8121325969696045 0.9532
|
35 |
+
34 14:43:22 1 0.1000 7.426563718677622 3.7572150230407715 0.9526
|
36 |
+
35 14:44:22 0 0.1000 7.3233047375636815 3.7696845531463623 0.9537
|
37 |
+
36 14:45:39 1 0.1000 7.389569210795175 3.7234673500061035 0.953
|
38 |
+
37 14:46:36 2 0.1000 7.281651627700941 3.734175443649292 0.9533
|
39 |
+
38 14:47:32 3 0.1000 7.270007542804279 3.769817352294922 0.9531
|
40 |
+
39 14:48:31 0 0.1000 7.213365968349761 3.6631696224212646 0.9543
|
41 |
+
40 14:49:47 1 0.1000 7.149712216537611 3.682539939880371 0.9543
|
42 |
+
41 14:50:44 2 0.1000 7.1777994527226 3.6655328273773193 0.954
|
43 |
+
42 14:51:39 0 0.1000 7.042381501830785 3.650456428527832 0.9549
|
44 |
+
43 14:52:56 0 0.1000 7.088383231542807 3.5950212478637695 0.9553
|
45 |
+
44 14:54:15 1 0.1000 7.041419814118242 3.6201629638671875 0.955
|
46 |
+
45 14:55:11 2 0.1000 6.940254409756281 3.6683692932128906 0.9547
|
47 |
+
46 14:56:08 3 0.1000 6.920480015003576 3.6241424083709717 0.9546
|
48 |
+
47 14:57:05 0 0.1000 6.890594191255823 3.5482797622680664 0.9559
|
49 |
+
48 14:58:25 1 0.1000 6.855043533629021 3.5812222957611084 0.9553
|
50 |
+
49 14:59:22 2 0.1000 6.800135021716093 3.53963565826416 0.9557
|
51 |
+
50 15:00:18 0 0.1000 6.768724057526715 3.5272233486175537 0.9568
|
52 |
+
51 15:01:36 1 0.1000 6.739383368365533 3.5689055919647217 0.9562
|
53 |
+
52 15:02:33 2 0.1000 6.661376100725832 3.5081849098205566 0.9564
|
54 |
+
53 15:03:30 3 0.1000 6.624162015661729 3.5275826454162598 0.9556
|
55 |
+
54 15:04:27 4 0.1000 6.6407164852176095 3.4983420372009277 0.9567
|
56 |
+
55 15:05:23 0 0.0500 6.55794746685872 3.457058906555176 0.9568
|
57 |
+
56 15:06:41 0 0.0500 6.554000040071201 3.446054458618164 0.9568
|
58 |
+
57 15:08:01 0 0.0500 6.505894108156188 3.4272050857543945 0.9569
|
59 |
+
58 15:09:19 0 0.0500 6.536830239591345 3.4196906089782715 0.9577
|
60 |
+
59 15:10:37 0 0.0500 6.450945086183801 3.4158873558044434 0.958
|
61 |
+
60 15:11:58 1 0.0500 6.467335283228781 3.4086313247680664 0.9578
|
62 |
+
61 15:12:54 2 0.0500 6.395290446492423 3.407853603363037 0.9575
|
63 |
+
62 15:13:50 3 0.0500 6.424222988364971 3.4143223762512207 0.9575
|
64 |
+
63 15:14:46 4 0.0500 6.408897661529811 3.399538516998291 0.9577
|
65 |
+
64 15:15:43 0 0.0250 6.38559970602525 3.399369239807129 0.958
|
66 |
+
65 15:17:01 1 0.0250 6.412552774479959 3.3750159740448 0.9576
|
67 |
+
66 15:17:57 0 0.0250 6.337497213245493 3.3704581260681152 0.9581
|
68 |
+
67 15:19:15 1 0.0250 6.324488298027917 3.38270902633667 0.958
|
69 |
+
68 15:20:12 2 0.0250 6.331536335227764 3.3832640647888184 0.9581
|
70 |
+
69 15:21:08 3 0.0250 6.313072968373256 3.3663582801818848 0.958
|
71 |
+
70 15:22:04 0 0.0250 6.234962488697693 3.3701589107513428 0.9581
|
72 |
+
71 15:23:23 0 0.0250 6.320951318318865 3.3578968048095703 0.9581
|
73 |
+
72 15:24:43 1 0.0250 6.296895212831751 3.358468770980835 0.9579
|
74 |
+
73 15:25:40 2 0.0250 6.242309089255544 3.3546149730682373 0.9579
|
75 |
+
74 15:26:37 0 0.0250 6.253713097192545 3.3542890548706055 0.9582
|
76 |
+
75 15:27:53 0 0.0250 6.227823793360617 3.352653980255127 0.9585
|
77 |
+
76 15:29:09 1 0.0250 6.255534311311435 3.3581032752990723 0.9581
|
78 |
+
77 15:30:06 2 0.0250 6.240572617117283 3.3542392253875732 0.9578
|
79 |
+
78 15:31:03 3 0.0250 6.261245811935019 3.3449277877807617 0.9583
|
80 |
+
79 15:31:59 4 0.0250 6.227426613326621 3.348515510559082 0.9584
|
81 |
+
80 15:32:56 0 0.0125 6.214050103077846 3.3447864055633545 0.9585
|
82 |
+
81 15:34:13 1 0.0125 6.221008515990941 3.3493881225585938 0.9581
|
83 |
+
82 15:35:10 2 0.0125 6.221904636484332 3.3341634273529053 0.9583
|
84 |
+
83 15:36:06 3 0.0125 6.160273648996268 3.338702917098999 0.9583
|
85 |
+
84 15:37:06 4 0.0125 6.138500496349503 3.328439235687256 0.9583
|
86 |
+
85 15:38:03 1 0.0063 6.161158080649587 3.3330752849578857 0.9582
|
87 |
+
86 15:39:00 2 0.0063 6.151211350365022 3.3295114040374756 0.9583
|
88 |
+
87 15:39:57 3 0.0063 6.232518280501914 3.32422137260437 0.9583
|
89 |
+
88 15:40:54 4 0.0063 6.236182183291005 3.3302392959594727 0.9583
|
90 |
+
89 15:41:51 0 0.0031 6.141926377220491 3.324725866317749 0.9585
|
91 |
+
90 15:43:08 1 0.0031 6.166836645750873 3.3286867141723633 0.9583
|
92 |
+
91 15:44:05 2 0.0031 6.13570765689411 3.3274388313293457 0.9582
|
93 |
+
92 15:45:02 3 0.0031 6.138865977261974 3.3267321586608887 0.9583
|
94 |
+
93 15:45:59 4 0.0031 6.161072790095236 3.3274779319763184 0.9583
|
95 |
+
94 15:46:55 1 0.0016 6.144343996469954 3.3262553215026855 0.9583
|
96 |
+
95 15:47:52 2 0.0016 6.172166739944863 3.3275344371795654 0.9583
|
97 |
+
96 15:48:52 3 0.0016 6.184053657329188 3.326869487762451 0.9583
|
98 |
+
97 15:49:49 4 0.0016 6.170179907199556 3.330195903778076 0.9585
|
99 |
+
98 15:50:45 1 0.0008 6.126421856669198 3.3274950981140137 0.9583
|
100 |
+
99 15:51:42 2 0.0008 6.152514440823445 3.3275747299194336 0.9584
|
101 |
+
100 15:52:38 0 0.0008 6.157003947063885 3.327719211578369 0.9586
|
102 |
+
101 15:53:55 1 0.0008 6.169898602814801 3.327259063720703 0.9585
|
103 |
+
102 15:54:52 2 0.0008 6.145160662389435 3.325741767883301 0.9585
|
104 |
+
103 15:55:48 0 0.0008 6.15242785901095 3.325845241546631 0.9586
|
105 |
+
104 15:57:06 1 0.0008 6.170637012582965 3.3250930309295654 0.9585
|
106 |
+
105 15:58:03 2 0.0008 6.101018842342681 3.32464861869812 0.9585
|
107 |
+
106 15:58:59 3 0.0008 6.18395307844719 3.3257997035980225 0.9585
|
108 |
+
107 15:59:56 4 0.0008 6.165698562048178 3.3272788524627686 0.9585
|
109 |
+
108 16:00:52 1 0.0004 6.123404840452481 3.32755184173584 0.9586
|
110 |
+
109 16:01:52 2 0.0004 6.168317748382028 3.3270320892333984 0.9585
|
111 |
+
110 16:02:49 3 0.0004 6.15737580831072 3.3262462615966797 0.9584
|
112 |
+
111 16:03:45 4 0.0004 6.181477074074534 3.325368881225586 0.9584
|
113 |
+
112 16:04:42 1 0.0002 6.151819916953028 3.3254475593566895 0.9585
|
114 |
+
113 16:05:39 2 0.0002 6.152685663341421 3.325159788131714 0.9585
|
115 |
+
114 16:06:36 3 0.0002 6.114718158688166 3.325321912765503 0.9584
|
116 |
+
115 16:07:33 4 0.0002 6.157736689643523 3.3252782821655273 0.9585
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58943a5be6d663da905aa8238e84d839aaef209dfec8ce716ed1ffe5e137cdee
|
3 |
+
size 1245112805
|
test.tsv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
train.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
from flair.data import Corpus
|
6 |
+
from flair.models import SequenceTagger
|
7 |
+
from flair.trainers import ModelTrainer
|
8 |
+
from flair.datasets import UniversalDependenciesCorpus
|
9 |
+
from flair.embeddings import WordEmbeddings, StackedEmbeddings
|
10 |
+
|
11 |
+
parser = argparse.ArgumentParser(description='Flair Training Part-of-speech tagging')
|
12 |
+
parser.add_argument('-output', type=str, default="models/", help='The output directory')
|
13 |
+
parser.add_argument('-epochs', type=int, default=1, help='Number of Epochs')
|
14 |
+
args = parser.parse_args()
|
15 |
+
|
16 |
+
output = os.path.join(args.output, "UPOS_UD_FRENCH_PLUS_" + str(args.epochs) + "_" + datetime.today().strftime('%Y-%m-%d-%H:%M:%S'))
|
17 |
+
print(output)
|
18 |
+
|
19 |
+
# corpus: Corpus = UD_FRENCH()
|
20 |
+
corpus: Corpus = UniversalDependenciesCorpus(
|
21 |
+
data_folder='UD_FRENCH_PLUS',
|
22 |
+
train_file="fr_gsd-ud-train.conllu",
|
23 |
+
test_file="fr_gsd-ud-test.conllu",
|
24 |
+
dev_file="fr_gsd-ud-dev.conllu",
|
25 |
+
)
|
26 |
+
# print(corpus)
|
27 |
+
|
28 |
+
tag_type = 'upos'
|
29 |
+
|
30 |
+
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
|
31 |
+
# print(tag_dictionary)
|
32 |
+
|
33 |
+
embedding_types = [
|
34 |
+
WordEmbeddings('fr'),
|
35 |
+
]
|
36 |
+
|
37 |
+
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)
|
38 |
+
|
39 |
+
tagger: SequenceTagger = SequenceTagger(
|
40 |
+
hidden_size=256,
|
41 |
+
embeddings=embeddings,
|
42 |
+
tag_dictionary=tag_dictionary,
|
43 |
+
tag_type=tag_type,
|
44 |
+
use_crf=True
|
45 |
+
)
|
46 |
+
|
47 |
+
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
|
48 |
+
|
49 |
+
trainer.train(
|
50 |
+
output,
|
51 |
+
learning_rate=0.1,
|
52 |
+
mini_batch_size=128,
|
53 |
+
max_epochs=args.epochs
|
54 |
+
)
|
training.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|