Upload folder using huggingface_hub
Browse files- best-model.pt +3 -0
- dev.tsv +0 -0
- final-model.pt +3 -0
- loss.tsv +11 -0
- runs/events.out.tfevents.1697051631.de2e83fddbee.1120.15 +3 -0
- test.tsv +0 -0
- training.log +262 -0
best-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ee222b65d920e066e8951701644b5a8b6e5af0a666ca6ad59a676b63bc381aa
|
3 |
+
size 870817519
|
dev.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
final-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f70efc15951fad2a7523182ef97ed1f0afc31555f88fa7b994c8d984597fdbd
|
3 |
+
size 870817636
|
loss.tsv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
+
1 19:23:16 0.0002 1.0556 0.1911 0.5299 0.5673 0.5480 0.4084
|
3 |
+
2 19:32:49 0.0001 0.1363 0.1130 0.7587 0.7701 0.7643 0.6388
|
4 |
+
3 19:42:37 0.0001 0.0744 0.1316 0.7564 0.8027 0.7789 0.6527
|
5 |
+
4 19:52:18 0.0001 0.0520 0.1415 0.7468 0.7986 0.7719 0.6458
|
6 |
+
5 20:02:02 0.0001 0.0370 0.1544 0.7927 0.8218 0.8069 0.6911
|
7 |
+
6 20:12:15 0.0001 0.0276 0.1777 0.7854 0.8218 0.8032 0.6825
|
8 |
+
7 20:21:52 0.0001 0.0200 0.1921 0.7843 0.8163 0.8000 0.6826
|
9 |
+
8 20:31:20 0.0000 0.0143 0.2037 0.7859 0.8190 0.8021 0.6825
|
10 |
+
9 20:40:59 0.0000 0.0098 0.2125 0.7859 0.8190 0.8021 0.6833
|
11 |
+
10 20:50:26 0.0000 0.0075 0.2191 0.7918 0.8177 0.8046 0.6861
|
runs/events.out.tfevents.1697051631.de2e83fddbee.1120.15
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7231acda1b3d75232cdad283ba648d7568473b4ac886f16e4ed3cd4e8b1349f6
|
3 |
+
size 999862
|
test.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training.log
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-11 19:13:51,177 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-10-11 19:13:51,179 Model: "SequenceTagger(
|
3 |
+
(embeddings): ByT5Embeddings(
|
4 |
+
(model): T5EncoderModel(
|
5 |
+
(shared): Embedding(384, 1472)
|
6 |
+
(encoder): T5Stack(
|
7 |
+
(embed_tokens): Embedding(384, 1472)
|
8 |
+
(block): ModuleList(
|
9 |
+
(0): T5Block(
|
10 |
+
(layer): ModuleList(
|
11 |
+
(0): T5LayerSelfAttention(
|
12 |
+
(SelfAttention): T5Attention(
|
13 |
+
(q): Linear(in_features=1472, out_features=384, bias=False)
|
14 |
+
(k): Linear(in_features=1472, out_features=384, bias=False)
|
15 |
+
(v): Linear(in_features=1472, out_features=384, bias=False)
|
16 |
+
(o): Linear(in_features=384, out_features=1472, bias=False)
|
17 |
+
(relative_attention_bias): Embedding(32, 6)
|
18 |
+
)
|
19 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
21 |
+
)
|
22 |
+
(1): T5LayerFF(
|
23 |
+
(DenseReluDense): T5DenseGatedActDense(
|
24 |
+
(wi_0): Linear(in_features=1472, out_features=3584, bias=False)
|
25 |
+
(wi_1): Linear(in_features=1472, out_features=3584, bias=False)
|
26 |
+
(wo): Linear(in_features=3584, out_features=1472, bias=False)
|
27 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
28 |
+
(act): NewGELUActivation()
|
29 |
+
)
|
30 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
31 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
32 |
+
)
|
33 |
+
)
|
34 |
+
)
|
35 |
+
(1-11): 11 x T5Block(
|
36 |
+
(layer): ModuleList(
|
37 |
+
(0): T5LayerSelfAttention(
|
38 |
+
(SelfAttention): T5Attention(
|
39 |
+
(q): Linear(in_features=1472, out_features=384, bias=False)
|
40 |
+
(k): Linear(in_features=1472, out_features=384, bias=False)
|
41 |
+
(v): Linear(in_features=1472, out_features=384, bias=False)
|
42 |
+
(o): Linear(in_features=384, out_features=1472, bias=False)
|
43 |
+
)
|
44 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
45 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
46 |
+
)
|
47 |
+
(1): T5LayerFF(
|
48 |
+
(DenseReluDense): T5DenseGatedActDense(
|
49 |
+
(wi_0): Linear(in_features=1472, out_features=3584, bias=False)
|
50 |
+
(wi_1): Linear(in_features=1472, out_features=3584, bias=False)
|
51 |
+
(wo): Linear(in_features=3584, out_features=1472, bias=False)
|
52 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
53 |
+
(act): NewGELUActivation()
|
54 |
+
)
|
55 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
56 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
57 |
+
)
|
58 |
+
)
|
59 |
+
)
|
60 |
+
)
|
61 |
+
(final_layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
62 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
63 |
+
)
|
64 |
+
)
|
65 |
+
)
|
66 |
+
(locked_dropout): LockedDropout(p=0.5)
|
67 |
+
(linear): Linear(in_features=1472, out_features=17, bias=True)
|
68 |
+
(loss_function): CrossEntropyLoss()
|
69 |
+
)"
|
70 |
+
2023-10-11 19:13:51,179 ----------------------------------------------------------------------------------------------------
|
71 |
+
2023-10-11 19:13:51,180 MultiCorpus: 7142 train + 698 dev + 2570 test sentences
|
72 |
+
- NER_HIPE_2022 Corpus: 7142 train + 698 dev + 2570 test sentences - /root/.flair/datasets/ner_hipe_2022/v2.1/newseye/fr/with_doc_seperator
|
73 |
+
2023-10-11 19:13:51,180 ----------------------------------------------------------------------------------------------------
|
74 |
+
2023-10-11 19:13:51,180 Train: 7142 sentences
|
75 |
+
2023-10-11 19:13:51,180 (train_with_dev=False, train_with_test=False)
|
76 |
+
2023-10-11 19:13:51,180 ----------------------------------------------------------------------------------------------------
|
77 |
+
2023-10-11 19:13:51,180 Training Params:
|
78 |
+
2023-10-11 19:13:51,180 - learning_rate: "0.00016"
|
79 |
+
2023-10-11 19:13:51,180 - mini_batch_size: "4"
|
80 |
+
2023-10-11 19:13:51,180 - max_epochs: "10"
|
81 |
+
2023-10-11 19:13:51,180 - shuffle: "True"
|
82 |
+
2023-10-11 19:13:51,180 ----------------------------------------------------------------------------------------------------
|
83 |
+
2023-10-11 19:13:51,180 Plugins:
|
84 |
+
2023-10-11 19:13:51,180 - TensorboardLogger
|
85 |
+
2023-10-11 19:13:51,180 - LinearScheduler | warmup_fraction: '0.1'
|
86 |
+
2023-10-11 19:13:51,181 ----------------------------------------------------------------------------------------------------
|
87 |
+
2023-10-11 19:13:51,181 Final evaluation on model from best epoch (best-model.pt)
|
88 |
+
2023-10-11 19:13:51,181 - metric: "('micro avg', 'f1-score')"
|
89 |
+
2023-10-11 19:13:51,181 ----------------------------------------------------------------------------------------------------
|
90 |
+
2023-10-11 19:13:51,181 Computation:
|
91 |
+
2023-10-11 19:13:51,181 - compute on device: cuda:0
|
92 |
+
2023-10-11 19:13:51,181 - embedding storage: none
|
93 |
+
2023-10-11 19:13:51,181 ----------------------------------------------------------------------------------------------------
|
94 |
+
2023-10-11 19:13:51,181 Model training base path: "hmbench-newseye/fr-hmbyt5-preliminary/byt5-small-historic-multilingual-span20-flax-bs4-wsFalse-e10-lr0.00016-poolingfirst-layers-1-crfFalse-4"
|
95 |
+
2023-10-11 19:13:51,181 ----------------------------------------------------------------------------------------------------
|
96 |
+
2023-10-11 19:13:51,181 ----------------------------------------------------------------------------------------------------
|
97 |
+
2023-10-11 19:13:51,181 Logging anything other than scalars to TensorBoard is currently not supported.
|
98 |
+
2023-10-11 19:14:45,792 epoch 1 - iter 178/1786 - loss 2.81726663 - time (sec): 54.61 - samples/sec: 427.42 - lr: 0.000016 - momentum: 0.000000
|
99 |
+
2023-10-11 19:15:39,272 epoch 1 - iter 356/1786 - loss 2.63354128 - time (sec): 108.09 - samples/sec: 443.28 - lr: 0.000032 - momentum: 0.000000
|
100 |
+
2023-10-11 19:16:33,216 epoch 1 - iter 534/1786 - loss 2.35130578 - time (sec): 162.03 - samples/sec: 444.80 - lr: 0.000048 - momentum: 0.000000
|
101 |
+
2023-10-11 19:17:26,272 epoch 1 - iter 712/1786 - loss 2.04487747 - time (sec): 215.09 - samples/sec: 450.33 - lr: 0.000064 - momentum: 0.000000
|
102 |
+
2023-10-11 19:18:17,461 epoch 1 - iter 890/1786 - loss 1.77647319 - time (sec): 266.28 - samples/sec: 451.84 - lr: 0.000080 - momentum: 0.000000
|
103 |
+
2023-10-11 19:19:13,643 epoch 1 - iter 1068/1786 - loss 1.55935737 - time (sec): 322.46 - samples/sec: 453.84 - lr: 0.000096 - momentum: 0.000000
|
104 |
+
2023-10-11 19:20:09,974 epoch 1 - iter 1246/1786 - loss 1.37945532 - time (sec): 378.79 - samples/sec: 456.82 - lr: 0.000112 - momentum: 0.000000
|
105 |
+
2023-10-11 19:21:04,833 epoch 1 - iter 1424/1786 - loss 1.25092114 - time (sec): 433.65 - samples/sec: 456.93 - lr: 0.000127 - momentum: 0.000000
|
106 |
+
2023-10-11 19:21:59,519 epoch 1 - iter 1602/1786 - loss 1.14992315 - time (sec): 488.34 - samples/sec: 455.35 - lr: 0.000143 - momentum: 0.000000
|
107 |
+
2023-10-11 19:22:54,887 epoch 1 - iter 1780/1786 - loss 1.05807649 - time (sec): 543.70 - samples/sec: 456.19 - lr: 0.000159 - momentum: 0.000000
|
108 |
+
2023-10-11 19:22:56,573 ----------------------------------------------------------------------------------------------------
|
109 |
+
2023-10-11 19:22:56,574 EPOCH 1 done: loss 1.0556 - lr: 0.000159
|
110 |
+
2023-10-11 19:23:16,475 DEV : loss 0.1910761147737503 - f1-score (micro avg) 0.548
|
111 |
+
2023-10-11 19:23:16,504 saving best model
|
112 |
+
2023-10-11 19:23:17,476 ----------------------------------------------------------------------------------------------------
|
113 |
+
2023-10-11 19:24:13,401 epoch 2 - iter 178/1786 - loss 0.19843727 - time (sec): 55.92 - samples/sec: 460.08 - lr: 0.000158 - momentum: 0.000000
|
114 |
+
2023-10-11 19:25:09,123 epoch 2 - iter 356/1786 - loss 0.19088934 - time (sec): 111.64 - samples/sec: 455.94 - lr: 0.000156 - momentum: 0.000000
|
115 |
+
2023-10-11 19:26:07,078 epoch 2 - iter 534/1786 - loss 0.17857709 - time (sec): 169.60 - samples/sec: 457.11 - lr: 0.000155 - momentum: 0.000000
|
116 |
+
2023-10-11 19:27:01,067 epoch 2 - iter 712/1786 - loss 0.17027634 - time (sec): 223.59 - samples/sec: 450.87 - lr: 0.000153 - momentum: 0.000000
|
117 |
+
2023-10-11 19:27:55,997 epoch 2 - iter 890/1786 - loss 0.16174542 - time (sec): 278.52 - samples/sec: 450.27 - lr: 0.000151 - momentum: 0.000000
|
118 |
+
2023-10-11 19:28:49,544 epoch 2 - iter 1068/1786 - loss 0.15308871 - time (sec): 332.07 - samples/sec: 451.00 - lr: 0.000149 - momentum: 0.000000
|
119 |
+
2023-10-11 19:29:43,254 epoch 2 - iter 1246/1786 - loss 0.14946536 - time (sec): 385.78 - samples/sec: 450.66 - lr: 0.000148 - momentum: 0.000000
|
120 |
+
2023-10-11 19:30:37,656 epoch 2 - iter 1424/1786 - loss 0.14427293 - time (sec): 440.18 - samples/sec: 450.49 - lr: 0.000146 - momentum: 0.000000
|
121 |
+
2023-10-11 19:31:31,781 epoch 2 - iter 1602/1786 - loss 0.13998247 - time (sec): 494.30 - samples/sec: 448.65 - lr: 0.000144 - momentum: 0.000000
|
122 |
+
2023-10-11 19:32:26,177 epoch 2 - iter 1780/1786 - loss 0.13658975 - time (sec): 548.70 - samples/sec: 451.31 - lr: 0.000142 - momentum: 0.000000
|
123 |
+
2023-10-11 19:32:28,111 ----------------------------------------------------------------------------------------------------
|
124 |
+
2023-10-11 19:32:28,112 EPOCH 2 done: loss 0.1363 - lr: 0.000142
|
125 |
+
2023-10-11 19:32:49,373 DEV : loss 0.11301875859498978 - f1-score (micro avg) 0.7643
|
126 |
+
2023-10-11 19:32:49,404 saving best model
|
127 |
+
2023-10-11 19:32:54,507 ----------------------------------------------------------------------------------------------------
|
128 |
+
2023-10-11 19:33:49,062 epoch 3 - iter 178/1786 - loss 0.07443452 - time (sec): 54.55 - samples/sec: 454.66 - lr: 0.000140 - momentum: 0.000000
|
129 |
+
2023-10-11 19:34:45,096 epoch 3 - iter 356/1786 - loss 0.07792617 - time (sec): 110.58 - samples/sec: 451.54 - lr: 0.000139 - momentum: 0.000000
|
130 |
+
2023-10-11 19:35:39,332 epoch 3 - iter 534/1786 - loss 0.07368656 - time (sec): 164.82 - samples/sec: 449.84 - lr: 0.000137 - momentum: 0.000000
|
131 |
+
2023-10-11 19:36:35,943 epoch 3 - iter 712/1786 - loss 0.07163510 - time (sec): 221.43 - samples/sec: 443.25 - lr: 0.000135 - momentum: 0.000000
|
132 |
+
2023-10-11 19:37:33,213 epoch 3 - iter 890/1786 - loss 0.07448758 - time (sec): 278.70 - samples/sec: 440.47 - lr: 0.000133 - momentum: 0.000000
|
133 |
+
2023-10-11 19:38:28,862 epoch 3 - iter 1068/1786 - loss 0.07522810 - time (sec): 334.35 - samples/sec: 441.07 - lr: 0.000132 - momentum: 0.000000
|
134 |
+
2023-10-11 19:39:24,154 epoch 3 - iter 1246/1786 - loss 0.07393748 - time (sec): 389.64 - samples/sec: 441.29 - lr: 0.000130 - momentum: 0.000000
|
135 |
+
2023-10-11 19:40:21,470 epoch 3 - iter 1424/1786 - loss 0.07497568 - time (sec): 446.96 - samples/sec: 441.14 - lr: 0.000128 - momentum: 0.000000
|
136 |
+
2023-10-11 19:41:17,975 epoch 3 - iter 1602/1786 - loss 0.07401065 - time (sec): 503.46 - samples/sec: 442.08 - lr: 0.000126 - momentum: 0.000000
|
137 |
+
2023-10-11 19:42:14,212 epoch 3 - iter 1780/1786 - loss 0.07443437 - time (sec): 559.70 - samples/sec: 442.67 - lr: 0.000125 - momentum: 0.000000
|
138 |
+
2023-10-11 19:42:16,107 ----------------------------------------------------------------------------------------------------
|
139 |
+
2023-10-11 19:42:16,108 EPOCH 3 done: loss 0.0744 - lr: 0.000125
|
140 |
+
2023-10-11 19:42:37,828 DEV : loss 0.13155920803546906 - f1-score (micro avg) 0.7789
|
141 |
+
2023-10-11 19:42:37,858 saving best model
|
142 |
+
2023-10-11 19:42:48,486 ----------------------------------------------------------------------------------------------------
|
143 |
+
2023-10-11 19:43:42,662 epoch 4 - iter 178/1786 - loss 0.05682850 - time (sec): 54.17 - samples/sec: 455.12 - lr: 0.000123 - momentum: 0.000000
|
144 |
+
2023-10-11 19:44:36,229 epoch 4 - iter 356/1786 - loss 0.05085291 - time (sec): 107.74 - samples/sec: 461.16 - lr: 0.000121 - momentum: 0.000000
|
145 |
+
2023-10-11 19:45:31,139 epoch 4 - iter 534/1786 - loss 0.05380353 - time (sec): 162.65 - samples/sec: 467.00 - lr: 0.000119 - momentum: 0.000000
|
146 |
+
2023-10-11 19:46:27,225 epoch 4 - iter 712/1786 - loss 0.05409746 - time (sec): 218.73 - samples/sec: 460.15 - lr: 0.000117 - momentum: 0.000000
|
147 |
+
2023-10-11 19:47:21,874 epoch 4 - iter 890/1786 - loss 0.05254482 - time (sec): 273.38 - samples/sec: 456.64 - lr: 0.000116 - momentum: 0.000000
|
148 |
+
2023-10-11 19:48:17,463 epoch 4 - iter 1068/1786 - loss 0.05090637 - time (sec): 328.97 - samples/sec: 455.59 - lr: 0.000114 - momentum: 0.000000
|
149 |
+
2023-10-11 19:49:12,569 epoch 4 - iter 1246/1786 - loss 0.05176189 - time (sec): 384.08 - samples/sec: 459.11 - lr: 0.000112 - momentum: 0.000000
|
150 |
+
2023-10-11 19:50:05,052 epoch 4 - iter 1424/1786 - loss 0.05266426 - time (sec): 436.56 - samples/sec: 457.32 - lr: 0.000110 - momentum: 0.000000
|
151 |
+
2023-10-11 19:50:59,830 epoch 4 - iter 1602/1786 - loss 0.05268196 - time (sec): 491.34 - samples/sec: 455.32 - lr: 0.000109 - momentum: 0.000000
|
152 |
+
2023-10-11 19:51:54,157 epoch 4 - iter 1780/1786 - loss 0.05210017 - time (sec): 545.67 - samples/sec: 454.53 - lr: 0.000107 - momentum: 0.000000
|
153 |
+
2023-10-11 19:51:55,885 ----------------------------------------------------------------------------------------------------
|
154 |
+
2023-10-11 19:51:55,885 EPOCH 4 done: loss 0.0520 - lr: 0.000107
|
155 |
+
2023-10-11 19:52:18,869 DEV : loss 0.14153534173965454 - f1-score (micro avg) 0.7719
|
156 |
+
2023-10-11 19:52:18,903 ----------------------------------------------------------------------------------------------------
|
157 |
+
2023-10-11 19:53:12,247 epoch 5 - iter 178/1786 - loss 0.03543846 - time (sec): 53.34 - samples/sec: 445.73 - lr: 0.000105 - momentum: 0.000000
|
158 |
+
2023-10-11 19:54:04,362 epoch 5 - iter 356/1786 - loss 0.03568755 - time (sec): 105.46 - samples/sec: 444.09 - lr: 0.000103 - momentum: 0.000000
|
159 |
+
2023-10-11 19:54:59,829 epoch 5 - iter 534/1786 - loss 0.03544231 - time (sec): 160.92 - samples/sec: 456.17 - lr: 0.000101 - momentum: 0.000000
|
160 |
+
2023-10-11 19:55:58,462 epoch 5 - iter 712/1786 - loss 0.03608462 - time (sec): 219.56 - samples/sec: 450.03 - lr: 0.000100 - momentum: 0.000000
|
161 |
+
2023-10-11 19:56:56,006 epoch 5 - iter 890/1786 - loss 0.03631887 - time (sec): 277.10 - samples/sec: 443.92 - lr: 0.000098 - momentum: 0.000000
|
162 |
+
2023-10-11 19:57:51,595 epoch 5 - iter 1068/1786 - loss 0.03584506 - time (sec): 332.69 - samples/sec: 440.11 - lr: 0.000096 - momentum: 0.000000
|
163 |
+
2023-10-11 19:58:52,566 epoch 5 - iter 1246/1786 - loss 0.03555371 - time (sec): 393.66 - samples/sec: 438.69 - lr: 0.000094 - momentum: 0.000000
|
164 |
+
2023-10-11 19:59:47,070 epoch 5 - iter 1424/1786 - loss 0.03537355 - time (sec): 448.16 - samples/sec: 438.91 - lr: 0.000093 - momentum: 0.000000
|
165 |
+
2023-10-11 20:00:42,523 epoch 5 - iter 1602/1786 - loss 0.03662827 - time (sec): 503.62 - samples/sec: 441.12 - lr: 0.000091 - momentum: 0.000000
|
166 |
+
2023-10-11 20:01:38,743 epoch 5 - iter 1780/1786 - loss 0.03694673 - time (sec): 559.84 - samples/sec: 442.55 - lr: 0.000089 - momentum: 0.000000
|
167 |
+
2023-10-11 20:01:40,649 ----------------------------------------------------------------------------------------------------
|
168 |
+
2023-10-11 20:01:40,650 EPOCH 5 done: loss 0.0370 - lr: 0.000089
|
169 |
+
2023-10-11 20:02:02,932 DEV : loss 0.1543937772512436 - f1-score (micro avg) 0.8069
|
170 |
+
2023-10-11 20:02:02,963 saving best model
|
171 |
+
2023-10-11 20:02:34,808 ----------------------------------------------------------------------------------------------------
|
172 |
+
2023-10-11 20:03:31,794 epoch 6 - iter 178/1786 - loss 0.03407214 - time (sec): 56.98 - samples/sec: 440.90 - lr: 0.000087 - momentum: 0.000000
|
173 |
+
2023-10-11 20:04:27,201 epoch 6 - iter 356/1786 - loss 0.02838239 - time (sec): 112.39 - samples/sec: 441.41 - lr: 0.000085 - momentum: 0.000000
|
174 |
+
2023-10-11 20:05:22,131 epoch 6 - iter 534/1786 - loss 0.02705824 - time (sec): 167.32 - samples/sec: 440.23 - lr: 0.000084 - momentum: 0.000000
|
175 |
+
2023-10-11 20:06:17,967 epoch 6 - iter 712/1786 - loss 0.02835522 - time (sec): 223.15 - samples/sec: 442.11 - lr: 0.000082 - momentum: 0.000000
|
176 |
+
2023-10-11 20:07:13,247 epoch 6 - iter 890/1786 - loss 0.02711076 - time (sec): 278.43 - samples/sec: 441.74 - lr: 0.000080 - momentum: 0.000000
|
177 |
+
2023-10-11 20:08:09,742 epoch 6 - iter 1068/1786 - loss 0.02894506 - time (sec): 334.93 - samples/sec: 443.02 - lr: 0.000078 - momentum: 0.000000
|
178 |
+
2023-10-11 20:09:04,988 epoch 6 - iter 1246/1786 - loss 0.02842164 - time (sec): 390.18 - samples/sec: 444.19 - lr: 0.000077 - momentum: 0.000000
|
179 |
+
2023-10-11 20:10:01,812 epoch 6 - iter 1424/1786 - loss 0.02862201 - time (sec): 447.00 - samples/sec: 443.26 - lr: 0.000075 - momentum: 0.000000
|
180 |
+
2023-10-11 20:10:58,162 epoch 6 - iter 1602/1786 - loss 0.02770039 - time (sec): 503.35 - samples/sec: 442.70 - lr: 0.000073 - momentum: 0.000000
|
181 |
+
2023-10-11 20:11:52,537 epoch 6 - iter 1780/1786 - loss 0.02749648 - time (sec): 557.72 - samples/sec: 444.60 - lr: 0.000071 - momentum: 0.000000
|
182 |
+
2023-10-11 20:11:54,275 ----------------------------------------------------------------------------------------------------
|
183 |
+
2023-10-11 20:11:54,275 EPOCH 6 done: loss 0.0276 - lr: 0.000071
|
184 |
+
2023-10-11 20:12:15,480 DEV : loss 0.17768503725528717 - f1-score (micro avg) 0.8032
|
185 |
+
2023-10-11 20:12:15,511 ----------------------------------------------------------------------------------------------------
|
186 |
+
2023-10-11 20:13:10,371 epoch 7 - iter 178/1786 - loss 0.02194253 - time (sec): 54.86 - samples/sec: 440.84 - lr: 0.000069 - momentum: 0.000000
|
187 |
+
2023-10-11 20:14:05,705 epoch 7 - iter 356/1786 - loss 0.02239136 - time (sec): 110.19 - samples/sec: 442.59 - lr: 0.000068 - momentum: 0.000000
|
188 |
+
2023-10-11 20:15:03,230 epoch 7 - iter 534/1786 - loss 0.02455740 - time (sec): 167.72 - samples/sec: 434.51 - lr: 0.000066 - momentum: 0.000000
|
189 |
+
2023-10-11 20:15:58,719 epoch 7 - iter 712/1786 - loss 0.02161452 - time (sec): 223.21 - samples/sec: 441.61 - lr: 0.000064 - momentum: 0.000000
|
190 |
+
2023-10-11 20:16:55,364 epoch 7 - iter 890/1786 - loss 0.02185857 - time (sec): 279.85 - samples/sec: 442.72 - lr: 0.000062 - momentum: 0.000000
|
191 |
+
2023-10-11 20:17:53,035 epoch 7 - iter 1068/1786 - loss 0.02033916 - time (sec): 337.52 - samples/sec: 440.31 - lr: 0.000061 - momentum: 0.000000
|
192 |
+
2023-10-11 20:18:46,607 epoch 7 - iter 1246/1786 - loss 0.02021475 - time (sec): 391.09 - samples/sec: 442.63 - lr: 0.000059 - momentum: 0.000000
|
193 |
+
2023-10-11 20:19:41,538 epoch 7 - iter 1424/1786 - loss 0.02040982 - time (sec): 446.02 - samples/sec: 445.41 - lr: 0.000057 - momentum: 0.000000
|
194 |
+
2023-10-11 20:20:36,433 epoch 7 - iter 1602/1786 - loss 0.02004331 - time (sec): 500.92 - samples/sec: 446.50 - lr: 0.000055 - momentum: 0.000000
|
195 |
+
2023-10-11 20:21:30,020 epoch 7 - iter 1780/1786 - loss 0.02009444 - time (sec): 554.51 - samples/sec: 446.57 - lr: 0.000053 - momentum: 0.000000
|
196 |
+
2023-10-11 20:21:31,879 ----------------------------------------------------------------------------------------------------
|
197 |
+
2023-10-11 20:21:31,879 EPOCH 7 done: loss 0.0200 - lr: 0.000053
|
198 |
+
2023-10-11 20:21:52,842 DEV : loss 0.19214080274105072 - f1-score (micro avg) 0.8
|
199 |
+
2023-10-11 20:21:52,871 ----------------------------------------------------------------------------------------------------
|
200 |
+
2023-10-11 20:22:46,267 epoch 8 - iter 178/1786 - loss 0.01515433 - time (sec): 53.39 - samples/sec: 462.82 - lr: 0.000052 - momentum: 0.000000
|
201 |
+
2023-10-11 20:23:40,414 epoch 8 - iter 356/1786 - loss 0.01607011 - time (sec): 107.54 - samples/sec: 469.13 - lr: 0.000050 - momentum: 0.000000
|
202 |
+
2023-10-11 20:24:34,115 epoch 8 - iter 534/1786 - loss 0.01419008 - time (sec): 161.24 - samples/sec: 467.30 - lr: 0.000048 - momentum: 0.000000
|
203 |
+
2023-10-11 20:25:28,581 epoch 8 - iter 712/1786 - loss 0.01574404 - time (sec): 215.71 - samples/sec: 465.31 - lr: 0.000046 - momentum: 0.000000
|
204 |
+
2023-10-11 20:26:22,156 epoch 8 - iter 890/1786 - loss 0.01486595 - time (sec): 269.28 - samples/sec: 461.55 - lr: 0.000044 - momentum: 0.000000
|
205 |
+
2023-10-11 20:27:16,509 epoch 8 - iter 1068/1786 - loss 0.01436697 - time (sec): 323.64 - samples/sec: 459.27 - lr: 0.000043 - momentum: 0.000000
|
206 |
+
2023-10-11 20:28:11,431 epoch 8 - iter 1246/1786 - loss 0.01439559 - time (sec): 378.56 - samples/sec: 459.92 - lr: 0.000041 - momentum: 0.000000
|
207 |
+
2023-10-11 20:29:04,809 epoch 8 - iter 1424/1786 - loss 0.01444281 - time (sec): 431.94 - samples/sec: 455.36 - lr: 0.000039 - momentum: 0.000000
|
208 |
+
2023-10-11 20:30:00,610 epoch 8 - iter 1602/1786 - loss 0.01433488 - time (sec): 487.74 - samples/sec: 456.40 - lr: 0.000037 - momentum: 0.000000
|
209 |
+
2023-10-11 20:30:56,858 epoch 8 - iter 1780/1786 - loss 0.01428435 - time (sec): 543.99 - samples/sec: 456.02 - lr: 0.000036 - momentum: 0.000000
|
210 |
+
2023-10-11 20:30:58,578 ----------------------------------------------------------------------------------------------------
|
211 |
+
2023-10-11 20:30:58,579 EPOCH 8 done: loss 0.0143 - lr: 0.000036
|
212 |
+
2023-10-11 20:31:20,849 DEV : loss 0.20372258126735687 - f1-score (micro avg) 0.8021
|
213 |
+
2023-10-11 20:31:20,881 ----------------------------------------------------------------------------------------------------
|
214 |
+
2023-10-11 20:32:16,935 epoch 9 - iter 178/1786 - loss 0.00793109 - time (sec): 56.05 - samples/sec: 422.02 - lr: 0.000034 - momentum: 0.000000
|
215 |
+
2023-10-11 20:33:14,012 epoch 9 - iter 356/1786 - loss 0.01077629 - time (sec): 113.13 - samples/sec: 436.99 - lr: 0.000032 - momentum: 0.000000
|
216 |
+
2023-10-11 20:34:11,690 epoch 9 - iter 534/1786 - loss 0.01120412 - time (sec): 170.81 - samples/sec: 443.56 - lr: 0.000030 - momentum: 0.000000
|
217 |
+
2023-10-11 20:35:08,258 epoch 9 - iter 712/1786 - loss 0.00993767 - time (sec): 227.38 - samples/sec: 442.48 - lr: 0.000028 - momentum: 0.000000
|
218 |
+
2023-10-11 20:36:02,018 epoch 9 - iter 890/1786 - loss 0.00981936 - time (sec): 281.14 - samples/sec: 443.78 - lr: 0.000027 - momentum: 0.000000
|
219 |
+
2023-10-11 20:36:57,308 epoch 9 - iter 1068/1786 - loss 0.00964522 - time (sec): 336.42 - samples/sec: 445.06 - lr: 0.000025 - momentum: 0.000000
|
220 |
+
2023-10-11 20:37:51,844 epoch 9 - iter 1246/1786 - loss 0.00967314 - time (sec): 390.96 - samples/sec: 444.94 - lr: 0.000023 - momentum: 0.000000
|
221 |
+
2023-10-11 20:38:46,124 epoch 9 - iter 1424/1786 - loss 0.00944093 - time (sec): 445.24 - samples/sec: 445.21 - lr: 0.000021 - momentum: 0.000000
|
222 |
+
2023-10-11 20:39:40,185 epoch 9 - iter 1602/1786 - loss 0.00949793 - time (sec): 499.30 - samples/sec: 445.57 - lr: 0.000020 - momentum: 0.000000
|
223 |
+
2023-10-11 20:40:35,488 epoch 9 - iter 1780/1786 - loss 0.00978050 - time (sec): 554.61 - samples/sec: 446.96 - lr: 0.000018 - momentum: 0.000000
|
224 |
+
2023-10-11 20:40:37,245 ----------------------------------------------------------------------------------------------------
|
225 |
+
2023-10-11 20:40:37,245 EPOCH 9 done: loss 0.0098 - lr: 0.000018
|
226 |
+
2023-10-11 20:40:59,367 DEV : loss 0.21249088644981384 - f1-score (micro avg) 0.8021
|
227 |
+
2023-10-11 20:40:59,397 ----------------------------------------------------------------------------------------------------
|
228 |
+
2023-10-11 20:41:53,260 epoch 10 - iter 178/1786 - loss 0.00766748 - time (sec): 53.86 - samples/sec: 467.31 - lr: 0.000016 - momentum: 0.000000
|
229 |
+
2023-10-11 20:42:47,621 epoch 10 - iter 356/1786 - loss 0.00666879 - time (sec): 108.22 - samples/sec: 473.72 - lr: 0.000014 - momentum: 0.000000
|
230 |
+
2023-10-11 20:43:40,341 epoch 10 - iter 534/1786 - loss 0.00669677 - time (sec): 160.94 - samples/sec: 463.30 - lr: 0.000012 - momentum: 0.000000
|
231 |
+
2023-10-11 20:44:33,751 epoch 10 - iter 712/1786 - loss 0.00714241 - time (sec): 214.35 - samples/sec: 463.12 - lr: 0.000011 - momentum: 0.000000
|
232 |
+
2023-10-11 20:45:27,178 epoch 10 - iter 890/1786 - loss 0.00681352 - time (sec): 267.78 - samples/sec: 457.07 - lr: 0.000009 - momentum: 0.000000
|
233 |
+
2023-10-11 20:46:22,447 epoch 10 - iter 1068/1786 - loss 0.00770174 - time (sec): 323.05 - samples/sec: 461.61 - lr: 0.000007 - momentum: 0.000000
|
234 |
+
2023-10-11 20:47:15,081 epoch 10 - iter 1246/1786 - loss 0.00775296 - time (sec): 375.68 - samples/sec: 460.13 - lr: 0.000005 - momentum: 0.000000
|
235 |
+
2023-10-11 20:48:10,290 epoch 10 - iter 1424/1786 - loss 0.00785199 - time (sec): 430.89 - samples/sec: 459.46 - lr: 0.000004 - momentum: 0.000000
|
236 |
+
2023-10-11 20:49:06,235 epoch 10 - iter 1602/1786 - loss 0.00742856 - time (sec): 486.84 - samples/sec: 458.91 - lr: 0.000002 - momentum: 0.000000
|
237 |
+
2023-10-11 20:50:01,353 epoch 10 - iter 1780/1786 - loss 0.00753833 - time (sec): 541.95 - samples/sec: 457.97 - lr: 0.000000 - momentum: 0.000000
|
238 |
+
2023-10-11 20:50:02,958 ----------------------------------------------------------------------------------------------------
|
239 |
+
2023-10-11 20:50:02,959 EPOCH 10 done: loss 0.0075 - lr: 0.000000
|
240 |
+
2023-10-11 20:50:25,995 DEV : loss 0.21911536157131195 - f1-score (micro avg) 0.8046
|
241 |
+
2023-10-11 20:50:27,138 ----------------------------------------------------------------------------------------------------
|
242 |
+
2023-10-11 20:50:27,141 Loading model from best epoch ...
|
243 |
+
2023-10-11 20:50:31,856 SequenceTagger predicts: Dictionary with 17 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG, S-HumanProd, B-HumanProd, E-HumanProd, I-HumanProd
|
244 |
+
2023-10-11 20:51:43,012
|
245 |
+
Results:
|
246 |
+
- F-score (micro) 0.6934
|
247 |
+
- F-score (macro) 0.6106
|
248 |
+
- Accuracy 0.5471
|
249 |
+
|
250 |
+
By class:
|
251 |
+
precision recall f1-score support
|
252 |
+
|
253 |
+
LOC 0.7176 0.7242 0.7209 1095
|
254 |
+
PER 0.7850 0.7431 0.7635 1012
|
255 |
+
ORG 0.4150 0.5742 0.4818 357
|
256 |
+
HumanProd 0.3922 0.6061 0.4762 33
|
257 |
+
|
258 |
+
micro avg 0.6787 0.7089 0.6934 2497
|
259 |
+
macro avg 0.5774 0.6619 0.6106 2497
|
260 |
+
weighted avg 0.6974 0.7089 0.7007 2497
|
261 |
+
|
262 |
+
2023-10-11 20:51:43,012 ----------------------------------------------------------------------------------------------------
|