qanastek commited on
Commit
75f7dd8
·
1 Parent(s): 528b2ee

Update model

Browse files
Files changed (6) hide show
  1. dev.tsv +0 -0
  2. loss.tsv +51 -0
  3. pytorch_model.bin +3 -0
  4. test.tsv +0 -0
  5. training.log +1188 -0
  6. weights.txt +0 -0
dev.tsv ADDED
The diff for this file is too large to render. See raw diff
 
loss.tsv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EPOCH TIMESTAMP BAD_EPOCHS LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
+ 1 08:40:38 0 0.1000 0.4139037357786823 0.09867297857999802 0.9723 0.9723 0.9723 0.9723
3
+ 2 08:43:59 0 0.1000 0.18683743789460058 0.08219591528177261 0.9761 0.9761 0.9761 0.9761
4
+ 3 08:47:19 0 0.1000 0.15330191376146995 0.07821641117334366 0.9771 0.9771 0.9771 0.9771
5
+ 4 08:50:41 0 0.1000 0.13679069024466697 0.07048774510622025 0.9784 0.9784 0.9784 0.9784
6
+ 5 08:54:05 0 0.1000 0.12696925415918978 0.06857253611087799 0.9795 0.9795 0.9795 0.9795
7
+ 6 08:57:27 0 0.1000 0.11950518270160057 0.06588418781757355 0.9805 0.9805 0.9805 0.9805
8
+ 7 09:00:50 0 0.1000 0.11493925645982497 0.06450950354337692 0.981 0.981 0.981 0.981
9
+ 8 09:04:17 1 0.1000 0.10857167749940388 0.06390747427940369 0.9805 0.9805 0.9805 0.9805
10
+ 9 09:07:32 0 0.1000 0.10607049356155288 0.06607701629400253 0.9814 0.9814 0.9814 0.9814
11
+ 10 09:10:55 1 0.1000 0.10357679251794805 0.06536506861448288 0.9811 0.9811 0.9811 0.9811
12
+ 11 09:14:14 2 0.1000 0.09934903912638607 0.06659943610429764 0.9811 0.9811 0.9811 0.9811
13
+ 12 09:17:31 0 0.1000 0.09835840644128474 0.06410104781389236 0.9816 0.9816 0.9816 0.9816
14
+ 13 09:20:54 1 0.1000 0.09667944757963298 0.06427688896656036 0.9816 0.9816 0.9816 0.9816
15
+ 14 09:24:13 0 0.1000 0.09310611423129937 0.06639766693115234 0.9817 0.9817 0.9817 0.9817
16
+ 15 09:27:36 0 0.1000 0.09273757020644302 0.06283392012119293 0.982 0.982 0.982 0.982
17
+ 16 09:30:58 1 0.1000 0.0906242242911817 0.06354553997516632 0.982 0.982 0.982 0.982
18
+ 17 09:34:16 0 0.1000 0.08953603486075622 0.06361010670661926 0.9823 0.9823 0.9823 0.9823
19
+ 18 09:37:38 1 0.1000 0.08716175395396096 0.06376409530639648 0.982 0.982 0.982 0.982
20
+ 19 09:40:54 0 0.1000 0.08630291839682559 0.06360483914613724 0.9824 0.9824 0.9824 0.9824
21
+ 20 09:44:18 1 0.1000 0.08576645481590557 0.06494450569152832 0.982 0.982 0.982 0.982
22
+ 21 09:47:35 0 0.1000 0.08385216420677111 0.06328344345092773 0.9827 0.9827 0.9827 0.9827
23
+ 22 09:50:58 1 0.1000 0.08442341949455835 0.06346500664949417 0.9815 0.9815 0.9815 0.9815
24
+ 23 09:54:16 2 0.1000 0.08142570236260006 0.06540019810199738 0.9821 0.9821 0.9821 0.9821
25
+ 24 09:57:35 3 0.1000 0.0822403573790078 0.06453310698270798 0.9819 0.9819 0.9819 0.9819
26
+ 25 10:00:51 4 0.1000 0.08115838012320148 0.06579063087701797 0.9817 0.9817 0.9817 0.9817
27
+ 26 10:04:08 1 0.0500 0.07444606900847728 0.06646668165922165 0.9822 0.9822 0.9822 0.9822
28
+ 27 10:07:27 2 0.0500 0.0712278272039567 0.06514652073383331 0.9823 0.9823 0.9823 0.9823
29
+ 28 10:10:43 0 0.0500 0.07007554484263678 0.06285692006349564 0.9828 0.9828 0.9828 0.9828
30
+ 29 10:14:05 0 0.0500 0.06775021975879568 0.06288447976112366 0.9831 0.9831 0.9831 0.9831
31
+ 30 10:17:28 1 0.0500 0.06664810656288497 0.06311798095703125 0.9824 0.9824 0.9824 0.9824
32
+ 31 10:20:44 2 0.0500 0.06655944385465427 0.06285466253757477 0.9829 0.9829 0.9829 0.9829
33
+ 32 10:24:00 3 0.0500 0.06484422466931324 0.062373436987400055 0.9827 0.9827 0.9827 0.9827
34
+ 33 10:27:18 4 0.0500 0.0640099294991078 0.06352584064006805 0.983 0.983 0.983 0.983
35
+ 34 10:30:35 0 0.0250 0.060174371477019914 0.06348917633295059 0.9835 0.9835 0.9835 0.9835
36
+ 35 10:33:57 1 0.0250 0.06001775798271323 0.06338120251893997 0.9829 0.9829 0.9829 0.9829
37
+ 36 10:37:17 2 0.0250 0.05871860721139249 0.06424003839492798 0.9835 0.9835 0.9835 0.9835
38
+ 37 10:40:35 3 0.0250 0.058616780999994414 0.06326954811811447 0.9831 0.9831 0.9831 0.9831
39
+ 38 10:43:52 4 0.0250 0.05700768598441678 0.06343492120504379 0.9831 0.9831 0.9831 0.9831
40
+ 39 10:47:11 1 0.0125 0.05521906535375693 0.06419230252504349 0.9829 0.9829 0.9829 0.9829
41
+ 40 10:50:26 2 0.0125 0.0545923078244546 0.06343018263578415 0.9829 0.9829 0.9829 0.9829
42
+ 41 10:53:43 3 0.0125 0.05370077676597374 0.06420625746250153 0.9831 0.9831 0.9831 0.9831
43
+ 42 10:57:03 4 0.0125 0.053218689249659105 0.06362675130367279 0.9831 0.9831 0.9831 0.9831
44
+ 43 11:00:20 1 0.0063 0.052319793331815405 0.06449297815561295 0.983 0.983 0.983 0.983
45
+ 44 11:03:36 2 0.0063 0.052714465782813934 0.06455685943365097 0.9831 0.9831 0.9831 0.9831
46
+ 45 11:06:56 3 0.0063 0.052344465870703995 0.06413871794939041 0.983 0.983 0.983 0.983
47
+ 46 11:10:13 4 0.0063 0.05212448329383334 0.0644669309258461 0.983 0.983 0.983 0.983
48
+ 47 11:13:29 1 0.0031 0.05113414183996207 0.06470787525177002 0.9829 0.9829 0.9829 0.9829
49
+ 48 11:16:48 2 0.0031 0.05100923420506556 0.06484530121088028 0.983 0.983 0.983 0.983
50
+ 49 11:20:04 3 0.0031 0.05102771810317176 0.06486314535140991 0.983 0.983 0.983 0.983
51
+ 50 11:23:20 4 0.0031 0.05190557099563212 0.06452730298042297 0.9831 0.9831 0.9831 0.9831
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81250a3c3c5e6639a92f0a5c782ca1533c886a4ae9d48dccf7fffb85c8c27794
3
+ size 539091093
test.tsv ADDED
The diff for this file is too large to render. See raw diff
 
training.log ADDED
@@ -0,0 +1,1188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-12-31 08:35:07,676 ----------------------------------------------------------------------------------------------------
2
+ 2021-12-31 08:35:07,680 Model: "SequenceTagger(
3
+ (embeddings): StackedEmbeddings(
4
+ (list_embedding_0): FlairEmbeddings(
5
+ (lm): LanguageModel(
6
+ (drop): Dropout(p=0.5, inplace=False)
7
+ (encoder): Embedding(275, 100)
8
+ (rnn): LSTM(100, 1024)
9
+ (decoder): Linear(in_features=1024, out_features=275, bias=True)
10
+ )
11
+ )
12
+ (list_embedding_1): FlairEmbeddings(
13
+ (lm): LanguageModel(
14
+ (drop): Dropout(p=0.5, inplace=False)
15
+ (encoder): Embedding(275, 100)
16
+ (rnn): LSTM(100, 1024)
17
+ (decoder): Linear(in_features=1024, out_features=275, bias=True)
18
+ )
19
+ )
20
+ (list_embedding_2): TransformerWordEmbeddings(
21
+ (model): CamembertModel(
22
+ (embeddings): RobertaEmbeddings(
23
+ (word_embeddings): Embedding(32005, 768, padding_idx=1)
24
+ (position_embeddings): Embedding(514, 768, padding_idx=1)
25
+ (token_type_embeddings): Embedding(1, 768)
26
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
27
+ (dropout): Dropout(p=0.1, inplace=False)
28
+ )
29
+ (encoder): RobertaEncoder(
30
+ (layer): ModuleList(
31
+ (0): RobertaLayer(
32
+ (attention): RobertaAttention(
33
+ (self): RobertaSelfAttention(
34
+ (query): Linear(in_features=768, out_features=768, bias=True)
35
+ (key): Linear(in_features=768, out_features=768, bias=True)
36
+ (value): Linear(in_features=768, out_features=768, bias=True)
37
+ (dropout): Dropout(p=0.1, inplace=False)
38
+ )
39
+ (output): RobertaSelfOutput(
40
+ (dense): Linear(in_features=768, out_features=768, bias=True)
41
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
42
+ (dropout): Dropout(p=0.1, inplace=False)
43
+ )
44
+ )
45
+ (intermediate): RobertaIntermediate(
46
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
47
+ )
48
+ (output): RobertaOutput(
49
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
50
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
51
+ (dropout): Dropout(p=0.1, inplace=False)
52
+ )
53
+ )
54
+ (1): RobertaLayer(
55
+ (attention): RobertaAttention(
56
+ (self): RobertaSelfAttention(
57
+ (query): Linear(in_features=768, out_features=768, bias=True)
58
+ (key): Linear(in_features=768, out_features=768, bias=True)
59
+ (value): Linear(in_features=768, out_features=768, bias=True)
60
+ (dropout): Dropout(p=0.1, inplace=False)
61
+ )
62
+ (output): RobertaSelfOutput(
63
+ (dense): Linear(in_features=768, out_features=768, bias=True)
64
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
65
+ (dropout): Dropout(p=0.1, inplace=False)
66
+ )
67
+ )
68
+ (intermediate): RobertaIntermediate(
69
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
70
+ )
71
+ (output): RobertaOutput(
72
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
73
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
74
+ (dropout): Dropout(p=0.1, inplace=False)
75
+ )
76
+ )
77
+ (2): RobertaLayer(
78
+ (attention): RobertaAttention(
79
+ (self): RobertaSelfAttention(
80
+ (query): Linear(in_features=768, out_features=768, bias=True)
81
+ (key): Linear(in_features=768, out_features=768, bias=True)
82
+ (value): Linear(in_features=768, out_features=768, bias=True)
83
+ (dropout): Dropout(p=0.1, inplace=False)
84
+ )
85
+ (output): RobertaSelfOutput(
86
+ (dense): Linear(in_features=768, out_features=768, bias=True)
87
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
88
+ (dropout): Dropout(p=0.1, inplace=False)
89
+ )
90
+ )
91
+ (intermediate): RobertaIntermediate(
92
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
93
+ )
94
+ (output): RobertaOutput(
95
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
96
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
97
+ (dropout): Dropout(p=0.1, inplace=False)
98
+ )
99
+ )
100
+ (3): RobertaLayer(
101
+ (attention): RobertaAttention(
102
+ (self): RobertaSelfAttention(
103
+ (query): Linear(in_features=768, out_features=768, bias=True)
104
+ (key): Linear(in_features=768, out_features=768, bias=True)
105
+ (value): Linear(in_features=768, out_features=768, bias=True)
106
+ (dropout): Dropout(p=0.1, inplace=False)
107
+ )
108
+ (output): RobertaSelfOutput(
109
+ (dense): Linear(in_features=768, out_features=768, bias=True)
110
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
111
+ (dropout): Dropout(p=0.1, inplace=False)
112
+ )
113
+ )
114
+ (intermediate): RobertaIntermediate(
115
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
116
+ )
117
+ (output): RobertaOutput(
118
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
119
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
120
+ (dropout): Dropout(p=0.1, inplace=False)
121
+ )
122
+ )
123
+ (4): RobertaLayer(
124
+ (attention): RobertaAttention(
125
+ (self): RobertaSelfAttention(
126
+ (query): Linear(in_features=768, out_features=768, bias=True)
127
+ (key): Linear(in_features=768, out_features=768, bias=True)
128
+ (value): Linear(in_features=768, out_features=768, bias=True)
129
+ (dropout): Dropout(p=0.1, inplace=False)
130
+ )
131
+ (output): RobertaSelfOutput(
132
+ (dense): Linear(in_features=768, out_features=768, bias=True)
133
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
134
+ (dropout): Dropout(p=0.1, inplace=False)
135
+ )
136
+ )
137
+ (intermediate): RobertaIntermediate(
138
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
139
+ )
140
+ (output): RobertaOutput(
141
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
142
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
143
+ (dropout): Dropout(p=0.1, inplace=False)
144
+ )
145
+ )
146
+ (5): RobertaLayer(
147
+ (attention): RobertaAttention(
148
+ (self): RobertaSelfAttention(
149
+ (query): Linear(in_features=768, out_features=768, bias=True)
150
+ (key): Linear(in_features=768, out_features=768, bias=True)
151
+ (value): Linear(in_features=768, out_features=768, bias=True)
152
+ (dropout): Dropout(p=0.1, inplace=False)
153
+ )
154
+ (output): RobertaSelfOutput(
155
+ (dense): Linear(in_features=768, out_features=768, bias=True)
156
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
157
+ (dropout): Dropout(p=0.1, inplace=False)
158
+ )
159
+ )
160
+ (intermediate): RobertaIntermediate(
161
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
162
+ )
163
+ (output): RobertaOutput(
164
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
165
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
166
+ (dropout): Dropout(p=0.1, inplace=False)
167
+ )
168
+ )
169
+ (6): RobertaLayer(
170
+ (attention): RobertaAttention(
171
+ (self): RobertaSelfAttention(
172
+ (query): Linear(in_features=768, out_features=768, bias=True)
173
+ (key): Linear(in_features=768, out_features=768, bias=True)
174
+ (value): Linear(in_features=768, out_features=768, bias=True)
175
+ (dropout): Dropout(p=0.1, inplace=False)
176
+ )
177
+ (output): RobertaSelfOutput(
178
+ (dense): Linear(in_features=768, out_features=768, bias=True)
179
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
180
+ (dropout): Dropout(p=0.1, inplace=False)
181
+ )
182
+ )
183
+ (intermediate): RobertaIntermediate(
184
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
185
+ )
186
+ (output): RobertaOutput(
187
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
188
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
189
+ (dropout): Dropout(p=0.1, inplace=False)
190
+ )
191
+ )
192
+ (7): RobertaLayer(
193
+ (attention): RobertaAttention(
194
+ (self): RobertaSelfAttention(
195
+ (query): Linear(in_features=768, out_features=768, bias=True)
196
+ (key): Linear(in_features=768, out_features=768, bias=True)
197
+ (value): Linear(in_features=768, out_features=768, bias=True)
198
+ (dropout): Dropout(p=0.1, inplace=False)
199
+ )
200
+ (output): RobertaSelfOutput(
201
+ (dense): Linear(in_features=768, out_features=768, bias=True)
202
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
203
+ (dropout): Dropout(p=0.1, inplace=False)
204
+ )
205
+ )
206
+ (intermediate): RobertaIntermediate(
207
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
208
+ )
209
+ (output): RobertaOutput(
210
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
211
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
212
+ (dropout): Dropout(p=0.1, inplace=False)
213
+ )
214
+ )
215
+ (8): RobertaLayer(
216
+ (attention): RobertaAttention(
217
+ (self): RobertaSelfAttention(
218
+ (query): Linear(in_features=768, out_features=768, bias=True)
219
+ (key): Linear(in_features=768, out_features=768, bias=True)
220
+ (value): Linear(in_features=768, out_features=768, bias=True)
221
+ (dropout): Dropout(p=0.1, inplace=False)
222
+ )
223
+ (output): RobertaSelfOutput(
224
+ (dense): Linear(in_features=768, out_features=768, bias=True)
225
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
226
+ (dropout): Dropout(p=0.1, inplace=False)
227
+ )
228
+ )
229
+ (intermediate): RobertaIntermediate(
230
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
231
+ )
232
+ (output): RobertaOutput(
233
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
234
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
235
+ (dropout): Dropout(p=0.1, inplace=False)
236
+ )
237
+ )
238
+ (9): RobertaLayer(
239
+ (attention): RobertaAttention(
240
+ (self): RobertaSelfAttention(
241
+ (query): Linear(in_features=768, out_features=768, bias=True)
242
+ (key): Linear(in_features=768, out_features=768, bias=True)
243
+ (value): Linear(in_features=768, out_features=768, bias=True)
244
+ (dropout): Dropout(p=0.1, inplace=False)
245
+ )
246
+ (output): RobertaSelfOutput(
247
+ (dense): Linear(in_features=768, out_features=768, bias=True)
248
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
249
+ (dropout): Dropout(p=0.1, inplace=False)
250
+ )
251
+ )
252
+ (intermediate): RobertaIntermediate(
253
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
254
+ )
255
+ (output): RobertaOutput(
256
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
257
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
258
+ (dropout): Dropout(p=0.1, inplace=False)
259
+ )
260
+ )
261
+ (10): RobertaLayer(
262
+ (attention): RobertaAttention(
263
+ (self): RobertaSelfAttention(
264
+ (query): Linear(in_features=768, out_features=768, bias=True)
265
+ (key): Linear(in_features=768, out_features=768, bias=True)
266
+ (value): Linear(in_features=768, out_features=768, bias=True)
267
+ (dropout): Dropout(p=0.1, inplace=False)
268
+ )
269
+ (output): RobertaSelfOutput(
270
+ (dense): Linear(in_features=768, out_features=768, bias=True)
271
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
272
+ (dropout): Dropout(p=0.1, inplace=False)
273
+ )
274
+ )
275
+ (intermediate): RobertaIntermediate(
276
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
277
+ )
278
+ (output): RobertaOutput(
279
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
280
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
281
+ (dropout): Dropout(p=0.1, inplace=False)
282
+ )
283
+ )
284
+ (11): RobertaLayer(
285
+ (attention): RobertaAttention(
286
+ (self): RobertaSelfAttention(
287
+ (query): Linear(in_features=768, out_features=768, bias=True)
288
+ (key): Linear(in_features=768, out_features=768, bias=True)
289
+ (value): Linear(in_features=768, out_features=768, bias=True)
290
+ (dropout): Dropout(p=0.1, inplace=False)
291
+ )
292
+ (output): RobertaSelfOutput(
293
+ (dense): Linear(in_features=768, out_features=768, bias=True)
294
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
295
+ (dropout): Dropout(p=0.1, inplace=False)
296
+ )
297
+ )
298
+ (intermediate): RobertaIntermediate(
299
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
300
+ )
301
+ (output): RobertaOutput(
302
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
303
+ (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
304
+ (dropout): Dropout(p=0.1, inplace=False)
305
+ )
306
+ )
307
+ )
308
+ )
309
+ (pooler): RobertaPooler(
310
+ (dense): Linear(in_features=768, out_features=768, bias=True)
311
+ (activation): Tanh()
312
+ )
313
+ )
314
+ )
315
+ )
316
+ (word_dropout): WordDropout(p=0.05)
317
+ (locked_dropout): LockedDropout(p=0.5)
318
+ (embedding2nn): Linear(in_features=2816, out_features=2816, bias=True)
319
+ (rnn): LSTM(2816, 256, batch_first=True, bidirectional=True)
320
+ (linear): Linear(in_features=512, out_features=68, bias=True)
321
+ (beta): 1.0
322
+ (weights): None
323
+ (weight_tensor) None
324
+ )"
325
+ 2021-12-31 08:35:07,680 ----------------------------------------------------------------------------------------------------
326
+ 2021-12-31 08:35:07,681 Corpus: "Corpus: 14449 train + 1476 dev + 416 test sentences"
327
+ 2021-12-31 08:35:07,681 ----------------------------------------------------------------------------------------------------
328
+ 2021-12-31 08:35:07,681 Parameters:
329
+ 2021-12-31 08:35:07,681 - learning_rate: "0.1"
330
+ 2021-12-31 08:35:07,681 - mini_batch_size: "8"
331
+ 2021-12-31 08:35:07,681 - patience: "3"
332
+ 2021-12-31 08:35:07,681 - anneal_factor: "0.5"
333
+ 2021-12-31 08:35:07,681 - max_epochs: "50"
334
+ 2021-12-31 08:35:07,681 - shuffle: "True"
335
+ 2021-12-31 08:35:07,681 - train_with_dev: "False"
336
+ 2021-12-31 08:35:07,681 - batch_growth_annealing: "False"
337
+ 2021-12-31 08:35:07,681 ----------------------------------------------------------------------------------------------------
338
+ 2021-12-31 08:35:07,681 Model training base path: "models/UPOS_UD_FRENCH_GSD_PLUS_Flair-Embeddings_50_2021-12-31-08:34:44"
339
+ 2021-12-31 08:35:07,681 ----------------------------------------------------------------------------------------------------
340
+ 2021-12-31 08:35:07,682 Device: cuda:0
341
+ 2021-12-31 08:35:07,682 ----------------------------------------------------------------------------------------------------
342
+ 2021-12-31 08:35:07,682 Embeddings storage mode: cpu
343
+ 2021-12-31 08:35:07,686 ----------------------------------------------------------------------------------------------------
344
+ 2021-12-31 08:35:35,600 epoch 1 - iter 180/1807 - loss 1.43338722 - samples/sec: 51.63 - lr: 0.100000
345
+ 2021-12-31 08:36:03,642 epoch 1 - iter 360/1807 - loss 0.97278560 - samples/sec: 51.39 - lr: 0.100000
346
+ 2021-12-31 08:36:31,448 epoch 1 - iter 540/1807 - loss 0.77628898 - samples/sec: 51.83 - lr: 0.100000
347
+ 2021-12-31 08:37:00,007 epoch 1 - iter 720/1807 - loss 0.66122431 - samples/sec: 50.46 - lr: 0.100000
348
+ 2021-12-31 08:37:29,449 epoch 1 - iter 900/1807 - loss 0.58637716 - samples/sec: 48.94 - lr: 0.100000
349
+ 2021-12-31 08:37:57,842 epoch 1 - iter 1080/1807 - loss 0.53261867 - samples/sec: 50.75 - lr: 0.100000
350
+ 2021-12-31 08:38:27,836 epoch 1 - iter 1260/1807 - loss 0.49236809 - samples/sec: 48.04 - lr: 0.100000
351
+ 2021-12-31 08:38:56,177 epoch 1 - iter 1440/1807 - loss 0.46224064 - samples/sec: 50.84 - lr: 0.100000
352
+ 2021-12-31 08:39:25,301 epoch 1 - iter 1620/1807 - loss 0.43700232 - samples/sec: 49.48 - lr: 0.100000
353
+ 2021-12-31 08:39:53,843 epoch 1 - iter 1800/1807 - loss 0.41459922 - samples/sec: 50.49 - lr: 0.100000
354
+ 2021-12-31 08:39:54,850 ----------------------------------------------------------------------------------------------------
355
+ 2021-12-31 08:39:54,851 EPOCH 1 done: loss 0.4139 - lr 0.1000000
356
+ 2021-12-31 08:40:38,186 DEV : loss 0.09867297857999802 - f1-score (micro avg) 0.9723
357
+ 2021-12-31 08:40:38,373 BAD EPOCHS (no improvement): 0
358
+ 2021-12-31 08:40:38,375 saving best model
359
+ 2021-12-31 08:40:43,945 ----------------------------------------------------------------------------------------------------
360
+ 2021-12-31 08:40:59,809 epoch 2 - iter 180/1807 - loss 0.20282785 - samples/sec: 90.92 - lr: 0.100000
361
+ 2021-12-31 08:41:15,798 epoch 2 - iter 360/1807 - loss 0.20600484 - samples/sec: 90.20 - lr: 0.100000
362
+ 2021-12-31 08:41:31,824 epoch 2 - iter 540/1807 - loss 0.20352355 - samples/sec: 89.99 - lr: 0.100000
363
+ 2021-12-31 08:41:47,291 epoch 2 - iter 720/1807 - loss 0.19945298 - samples/sec: 93.24 - lr: 0.100000
364
+ 2021-12-31 08:42:03,389 epoch 2 - iter 900/1807 - loss 0.19672769 - samples/sec: 89.58 - lr: 0.100000
365
+ 2021-12-31 08:42:19,546 epoch 2 - iter 1080/1807 - loss 0.19404584 - samples/sec: 89.25 - lr: 0.100000
366
+ 2021-12-31 08:42:35,186 epoch 2 - iter 1260/1807 - loss 0.19211776 - samples/sec: 92.22 - lr: 0.100000
367
+ 2021-12-31 08:42:51,014 epoch 2 - iter 1440/1807 - loss 0.19040930 - samples/sec: 91.11 - lr: 0.100000
368
+ 2021-12-31 08:43:07,108 epoch 2 - iter 1620/1807 - loss 0.18835936 - samples/sec: 89.60 - lr: 0.100000
369
+ 2021-12-31 08:43:22,664 epoch 2 - iter 1800/1807 - loss 0.18684498 - samples/sec: 92.71 - lr: 0.100000
370
+ 2021-12-31 08:43:23,166 ----------------------------------------------------------------------------------------------------
371
+ 2021-12-31 08:43:23,166 EPOCH 2 done: loss 0.1868 - lr 0.1000000
372
+ 2021-12-31 08:43:59,411 DEV : loss 0.08219591528177261 - f1-score (micro avg) 0.9761
373
+ 2021-12-31 08:43:59,601 BAD EPOCHS (no improvement): 0
374
+ 2021-12-31 08:43:59,602 saving best model
375
+ 2021-12-31 08:44:04,994 ----------------------------------------------------------------------------------------------------
376
+ 2021-12-31 08:44:21,188 epoch 3 - iter 180/1807 - loss 0.16248988 - samples/sec: 89.06 - lr: 0.100000
377
+ 2021-12-31 08:44:37,143 epoch 3 - iter 360/1807 - loss 0.16012805 - samples/sec: 90.38 - lr: 0.100000
378
+ 2021-12-31 08:44:53,240 epoch 3 - iter 540/1807 - loss 0.15771573 - samples/sec: 89.59 - lr: 0.100000
379
+ 2021-12-31 08:45:08,820 epoch 3 - iter 720/1807 - loss 0.15678918 - samples/sec: 92.57 - lr: 0.100000
380
+ 2021-12-31 08:45:24,447 epoch 3 - iter 900/1807 - loss 0.15583330 - samples/sec: 92.28 - lr: 0.100000
381
+ 2021-12-31 08:45:40,453 epoch 3 - iter 1080/1807 - loss 0.15551694 - samples/sec: 90.10 - lr: 0.100000
382
+ 2021-12-31 08:45:56,421 epoch 3 - iter 1260/1807 - loss 0.15503272 - samples/sec: 90.32 - lr: 0.100000
383
+ 2021-12-31 08:46:12,207 epoch 3 - iter 1440/1807 - loss 0.15478837 - samples/sec: 91.35 - lr: 0.100000
384
+ 2021-12-31 08:46:28,067 epoch 3 - iter 1620/1807 - loss 0.15437671 - samples/sec: 90.93 - lr: 0.100000
385
+ 2021-12-31 08:46:44,096 epoch 3 - iter 1800/1807 - loss 0.15334210 - samples/sec: 89.96 - lr: 0.100000
386
+ 2021-12-31 08:46:44,638 ----------------------------------------------------------------------------------------------------
387
+ 2021-12-31 08:46:44,638 EPOCH 3 done: loss 0.1533 - lr 0.1000000
388
+ 2021-12-31 08:47:19,364 DEV : loss 0.07821641117334366 - f1-score (micro avg) 0.9771
389
+ 2021-12-31 08:47:19,574 BAD EPOCHS (no improvement): 0
390
+ 2021-12-31 08:47:19,576 saving best model
391
+ 2021-12-31 08:47:25,807 ----------------------------------------------------------------------------------------------------
392
+ 2021-12-31 08:47:42,295 epoch 4 - iter 180/1807 - loss 0.14078583 - samples/sec: 87.48 - lr: 0.100000
393
+ 2021-12-31 08:47:58,394 epoch 4 - iter 360/1807 - loss 0.14084079 - samples/sec: 89.58 - lr: 0.100000
394
+ 2021-12-31 08:48:14,377 epoch 4 - iter 540/1807 - loss 0.13969043 - samples/sec: 90.22 - lr: 0.100000
395
+ 2021-12-31 08:48:30,411 epoch 4 - iter 720/1807 - loss 0.13901425 - samples/sec: 89.95 - lr: 0.100000
396
+ 2021-12-31 08:48:45,985 epoch 4 - iter 900/1807 - loss 0.13965987 - samples/sec: 92.60 - lr: 0.100000
397
+ 2021-12-31 08:49:01,706 epoch 4 - iter 1080/1807 - loss 0.13942263 - samples/sec: 91.73 - lr: 0.100000
398
+ 2021-12-31 08:49:17,833 epoch 4 - iter 1260/1807 - loss 0.13931213 - samples/sec: 89.42 - lr: 0.100000
399
+ 2021-12-31 08:49:33,693 epoch 4 - iter 1440/1807 - loss 0.13835426 - samples/sec: 90.94 - lr: 0.100000
400
+ 2021-12-31 08:49:49,444 epoch 4 - iter 1620/1807 - loss 0.13722078 - samples/sec: 91.56 - lr: 0.100000
401
+ 2021-12-31 08:50:05,233 epoch 4 - iter 1800/1807 - loss 0.13680325 - samples/sec: 91.33 - lr: 0.100000
402
+ 2021-12-31 08:50:05,825 ----------------------------------------------------------------------------------------------------
403
+ 2021-12-31 08:50:05,826 EPOCH 4 done: loss 0.1368 - lr 0.1000000
404
+ 2021-12-31 08:50:40,951 DEV : loss 0.07048774510622025 - f1-score (micro avg) 0.9784
405
+ 2021-12-31 08:50:41,121 BAD EPOCHS (no improvement): 0
406
+ 2021-12-31 08:50:41,123 saving best model
407
+ 2021-12-31 08:50:46,985 ----------------------------------------------------------------------------------------------------
408
+ 2021-12-31 08:51:03,480 epoch 5 - iter 180/1807 - loss 0.12576483 - samples/sec: 87.44 - lr: 0.100000
409
+ 2021-12-31 08:51:19,312 epoch 5 - iter 360/1807 - loss 0.12838224 - samples/sec: 91.10 - lr: 0.100000
410
+ 2021-12-31 08:51:35,140 epoch 5 - iter 540/1807 - loss 0.13027925 - samples/sec: 91.11 - lr: 0.100000
411
+ 2021-12-31 08:51:51,382 epoch 5 - iter 720/1807 - loss 0.13001079 - samples/sec: 88.78 - lr: 0.100000
412
+ 2021-12-31 08:52:07,009 epoch 5 - iter 900/1807 - loss 0.12990639 - samples/sec: 92.28 - lr: 0.100000
413
+ 2021-12-31 08:52:22,749 epoch 5 - iter 1080/1807 - loss 0.12927608 - samples/sec: 91.63 - lr: 0.100000
414
+ 2021-12-31 08:52:38,459 epoch 5 - iter 1260/1807 - loss 0.12839810 - samples/sec: 91.79 - lr: 0.100000
415
+ 2021-12-31 08:52:54,183 epoch 5 - iter 1440/1807 - loss 0.12750076 - samples/sec: 91.71 - lr: 0.100000
416
+ 2021-12-31 08:53:09,782 epoch 5 - iter 1620/1807 - loss 0.12744081 - samples/sec: 92.45 - lr: 0.100000
417
+ 2021-12-31 08:53:26,181 epoch 5 - iter 1800/1807 - loss 0.12697954 - samples/sec: 87.94 - lr: 0.100000
418
+ 2021-12-31 08:53:26,718 ----------------------------------------------------------------------------------------------------
419
+ 2021-12-31 08:53:26,718 EPOCH 5 done: loss 0.1270 - lr 0.1000000
420
+ 2021-12-31 08:54:05,303 DEV : loss 0.06857253611087799 - f1-score (micro avg) 0.9795
421
+ 2021-12-31 08:54:05,490 BAD EPOCHS (no improvement): 0
422
+ 2021-12-31 08:54:05,491 saving best model
423
+ 2021-12-31 08:54:11,317 ----------------------------------------------------------------------------------------------------
424
+ 2021-12-31 08:54:27,729 epoch 6 - iter 180/1807 - loss 0.12012197 - samples/sec: 87.88 - lr: 0.100000
425
+ 2021-12-31 08:54:43,570 epoch 6 - iter 360/1807 - loss 0.12134345 - samples/sec: 91.04 - lr: 0.100000
426
+ 2021-12-31 08:54:59,298 epoch 6 - iter 540/1807 - loss 0.12010472 - samples/sec: 91.70 - lr: 0.100000
427
+ 2021-12-31 08:55:14,710 epoch 6 - iter 720/1807 - loss 0.11985671 - samples/sec: 93.58 - lr: 0.100000
428
+ 2021-12-31 08:55:30,873 epoch 6 - iter 900/1807 - loss 0.12032070 - samples/sec: 89.22 - lr: 0.100000
429
+ 2021-12-31 08:55:46,705 epoch 6 - iter 1080/1807 - loss 0.11976455 - samples/sec: 91.08 - lr: 0.100000
430
+ 2021-12-31 08:56:02,915 epoch 6 - iter 1260/1807 - loss 0.11964832 - samples/sec: 88.97 - lr: 0.100000
431
+ 2021-12-31 08:56:18,616 epoch 6 - iter 1440/1807 - loss 0.11958148 - samples/sec: 91.86 - lr: 0.100000
432
+ 2021-12-31 08:56:34,478 epoch 6 - iter 1620/1807 - loss 0.12003314 - samples/sec: 90.91 - lr: 0.100000
433
+ 2021-12-31 08:56:50,548 epoch 6 - iter 1800/1807 - loss 0.11950787 - samples/sec: 89.75 - lr: 0.100000
434
+ 2021-12-31 08:56:51,070 ----------------------------------------------------------------------------------------------------
435
+ 2021-12-31 08:56:51,070 EPOCH 6 done: loss 0.1195 - lr 0.1000000
436
+ 2021-12-31 08:57:26,881 DEV : loss 0.06588418781757355 - f1-score (micro avg) 0.9805
437
+ 2021-12-31 08:57:27,077 BAD EPOCHS (no improvement): 0
438
+ 2021-12-31 08:57:27,079 saving best model
439
+ 2021-12-31 08:57:32,878 ----------------------------------------------------------------------------------------------------
440
+ 2021-12-31 08:57:49,222 epoch 7 - iter 180/1807 - loss 0.11622596 - samples/sec: 88.27 - lr: 0.100000
441
+ 2021-12-31 08:58:05,154 epoch 7 - iter 360/1807 - loss 0.11182908 - samples/sec: 90.52 - lr: 0.100000
442
+ 2021-12-31 08:58:21,316 epoch 7 - iter 540/1807 - loss 0.11325284 - samples/sec: 89.23 - lr: 0.100000
443
+ 2021-12-31 08:58:37,501 epoch 7 - iter 720/1807 - loss 0.11356510 - samples/sec: 89.11 - lr: 0.100000
444
+ 2021-12-31 08:58:53,437 epoch 7 - iter 900/1807 - loss 0.11375009 - samples/sec: 90.50 - lr: 0.100000
445
+ 2021-12-31 08:59:09,683 epoch 7 - iter 1080/1807 - loss 0.11424006 - samples/sec: 88.76 - lr: 0.100000
446
+ 2021-12-31 08:59:25,513 epoch 7 - iter 1260/1807 - loss 0.11502991 - samples/sec: 91.10 - lr: 0.100000
447
+ 2021-12-31 08:59:41,355 epoch 7 - iter 1440/1807 - loss 0.11465724 - samples/sec: 91.04 - lr: 0.100000
448
+ 2021-12-31 08:59:57,048 epoch 7 - iter 1620/1807 - loss 0.11489345 - samples/sec: 91.91 - lr: 0.100000
449
+ 2021-12-31 09:00:13,626 epoch 7 - iter 1800/1807 - loss 0.11495780 - samples/sec: 86.99 - lr: 0.100000
450
+ 2021-12-31 09:00:14,225 ----------------------------------------------------------------------------------------------------
451
+ 2021-12-31 09:00:14,225 EPOCH 7 done: loss 0.1149 - lr 0.1000000
452
+ 2021-12-31 09:00:50,356 DEV : loss 0.06450950354337692 - f1-score (micro avg) 0.981
453
+ 2021-12-31 09:00:50,566 BAD EPOCHS (no improvement): 0
454
+ 2021-12-31 09:00:50,572 saving best model
455
+ 2021-12-31 09:00:56,353 ----------------------------------------------------------------------------------------------------
456
+ 2021-12-31 09:01:12,703 epoch 8 - iter 180/1807 - loss 0.10372694 - samples/sec: 88.23 - lr: 0.100000
457
+ 2021-12-31 09:01:28,785 epoch 8 - iter 360/1807 - loss 0.10507104 - samples/sec: 89.68 - lr: 0.100000
458
+ 2021-12-31 09:01:45,134 epoch 8 - iter 540/1807 - loss 0.10666062 - samples/sec: 88.21 - lr: 0.100000
459
+ 2021-12-31 09:02:01,507 epoch 8 - iter 720/1807 - loss 0.10750728 - samples/sec: 88.08 - lr: 0.100000
460
+ 2021-12-31 09:02:17,626 epoch 8 - iter 900/1807 - loss 0.10760637 - samples/sec: 89.47 - lr: 0.100000
461
+ 2021-12-31 09:02:33,374 epoch 8 - iter 1080/1807 - loss 0.10788257 - samples/sec: 91.58 - lr: 0.100000
462
+ 2021-12-31 09:02:49,200 epoch 8 - iter 1260/1807 - loss 0.10808589 - samples/sec: 91.12 - lr: 0.100000
463
+ 2021-12-31 09:03:05,738 epoch 8 - iter 1440/1807 - loss 0.10815170 - samples/sec: 87.20 - lr: 0.100000
464
+ 2021-12-31 09:03:21,442 epoch 8 - iter 1620/1807 - loss 0.10840840 - samples/sec: 91.84 - lr: 0.100000
465
+ 2021-12-31 09:03:37,709 epoch 8 - iter 1800/1807 - loss 0.10855634 - samples/sec: 88.66 - lr: 0.100000
466
+ 2021-12-31 09:03:38,280 ----------------------------------------------------------------------------------------------------
467
+ 2021-12-31 09:03:38,280 EPOCH 8 done: loss 0.1086 - lr 0.1000000
468
+ 2021-12-31 09:04:17,043 DEV : loss 0.06390747427940369 - f1-score (micro avg) 0.9805
469
+ 2021-12-31 09:04:17,194 BAD EPOCHS (no improvement): 1
470
+ 2021-12-31 09:04:17,196 ----------------------------------------------------------------------------------------------------
471
+ 2021-12-31 09:04:33,331 epoch 9 - iter 180/1807 - loss 0.10260778 - samples/sec: 89.39 - lr: 0.100000
472
+ 2021-12-31 09:04:49,336 epoch 9 - iter 360/1807 - loss 0.10566575 - samples/sec: 90.11 - lr: 0.100000
473
+ 2021-12-31 09:05:05,083 epoch 9 - iter 540/1807 - loss 0.10556216 - samples/sec: 91.59 - lr: 0.100000
474
+ 2021-12-31 09:05:21,004 epoch 9 - iter 720/1807 - loss 0.10506801 - samples/sec: 90.58 - lr: 0.100000
475
+ 2021-12-31 09:05:37,109 epoch 9 - iter 900/1807 - loss 0.10596338 - samples/sec: 89.54 - lr: 0.100000
476
+ 2021-12-31 09:05:52,784 epoch 9 - iter 1080/1807 - loss 0.10577668 - samples/sec: 92.02 - lr: 0.100000
477
+ 2021-12-31 09:06:08,937 epoch 9 - iter 1260/1807 - loss 0.10613509 - samples/sec: 89.28 - lr: 0.100000
478
+ 2021-12-31 09:06:24,601 epoch 9 - iter 1440/1807 - loss 0.10637150 - samples/sec: 92.06 - lr: 0.100000
479
+ 2021-12-31 09:06:40,409 epoch 9 - iter 1620/1807 - loss 0.10629708 - samples/sec: 91.23 - lr: 0.100000
480
+ 2021-12-31 09:06:55,972 epoch 9 - iter 1800/1807 - loss 0.10610710 - samples/sec: 92.67 - lr: 0.100000
481
+ 2021-12-31 09:06:56,557 ----------------------------------------------------------------------------------------------------
482
+ 2021-12-31 09:06:56,557 EPOCH 9 done: loss 0.1061 - lr 0.1000000
483
+ 2021-12-31 09:07:32,784 DEV : loss 0.06607701629400253 - f1-score (micro avg) 0.9814
484
+ 2021-12-31 09:07:32,970 BAD EPOCHS (no improvement): 0
485
+ 2021-12-31 09:07:32,972 saving best model
486
+ 2021-12-31 09:07:38,755 ----------------------------------------------------------------------------------------------------
487
+ 2021-12-31 09:07:55,004 epoch 10 - iter 180/1807 - loss 0.10366226 - samples/sec: 88.76 - lr: 0.100000
488
+ 2021-12-31 09:08:11,104 epoch 10 - iter 360/1807 - loss 0.10828055 - samples/sec: 89.58 - lr: 0.100000
489
+ 2021-12-31 09:08:26,748 epoch 10 - iter 540/1807 - loss 0.10589800 - samples/sec: 92.20 - lr: 0.100000
490
+ 2021-12-31 09:08:42,772 epoch 10 - iter 720/1807 - loss 0.10467961 - samples/sec: 90.00 - lr: 0.100000
491
+ 2021-12-31 09:08:58,992 epoch 10 - iter 900/1807 - loss 0.10355149 - samples/sec: 88.91 - lr: 0.100000
492
+ 2021-12-31 09:09:14,753 epoch 10 - iter 1080/1807 - loss 0.10313717 - samples/sec: 91.50 - lr: 0.100000
493
+ 2021-12-31 09:09:30,631 epoch 10 - iter 1260/1807 - loss 0.10353533 - samples/sec: 90.84 - lr: 0.100000
494
+ 2021-12-31 09:09:46,654 epoch 10 - iter 1440/1807 - loss 0.10386166 - samples/sec: 90.02 - lr: 0.100000
495
+ 2021-12-31 09:10:02,791 epoch 10 - iter 1620/1807 - loss 0.10346798 - samples/sec: 89.36 - lr: 0.100000
496
+ 2021-12-31 09:10:18,970 epoch 10 - iter 1800/1807 - loss 0.10358051 - samples/sec: 89.14 - lr: 0.100000
497
+ 2021-12-31 09:10:19,492 ----------------------------------------------------------------------------------------------------
498
+ 2021-12-31 09:10:19,492 EPOCH 10 done: loss 0.1036 - lr 0.1000000
499
+ 2021-12-31 09:10:55,557 DEV : loss 0.06536506861448288 - f1-score (micro avg) 0.9811
500
+ 2021-12-31 09:10:55,753 BAD EPOCHS (no improvement): 1
501
+ 2021-12-31 09:10:55,756 ----------------------------------------------------------------------------------------------------
502
+ 2021-12-31 09:11:12,024 epoch 11 - iter 180/1807 - loss 0.10182872 - samples/sec: 88.66 - lr: 0.100000
503
+ 2021-12-31 09:11:28,246 epoch 11 - iter 360/1807 - loss 0.10175535 - samples/sec: 88.90 - lr: 0.100000
504
+ 2021-12-31 09:11:43,844 epoch 11 - iter 540/1807 - loss 0.10107946 - samples/sec: 92.46 - lr: 0.100000
505
+ 2021-12-31 09:11:59,559 epoch 11 - iter 720/1807 - loss 0.10053922 - samples/sec: 91.77 - lr: 0.100000
506
+ 2021-12-31 09:12:15,490 epoch 11 - iter 900/1807 - loss 0.10047028 - samples/sec: 90.54 - lr: 0.100000
507
+ 2021-12-31 09:12:31,195 epoch 11 - iter 1080/1807 - loss 0.09993958 - samples/sec: 91.82 - lr: 0.100000
508
+ 2021-12-31 09:12:47,013 epoch 11 - iter 1260/1807 - loss 0.09996914 - samples/sec: 91.17 - lr: 0.100000
509
+ 2021-12-31 09:13:03,156 epoch 11 - iter 1440/1807 - loss 0.09980985 - samples/sec: 89.35 - lr: 0.100000
510
+ 2021-12-31 09:13:18,852 epoch 11 - iter 1620/1807 - loss 0.09941318 - samples/sec: 91.88 - lr: 0.100000
511
+ 2021-12-31 09:13:35,014 epoch 11 - iter 1800/1807 - loss 0.09934768 - samples/sec: 89.23 - lr: 0.100000
512
+ 2021-12-31 09:13:35,650 ----------------------------------------------------------------------------------------------------
513
+ 2021-12-31 09:13:35,650 EPOCH 11 done: loss 0.0993 - lr 0.1000000
514
+ 2021-12-31 09:14:14,419 DEV : loss 0.06659943610429764 - f1-score (micro avg) 0.9811
515
+ 2021-12-31 09:14:14,622 BAD EPOCHS (no improvement): 2
516
+ 2021-12-31 09:14:14,623 ----------------------------------------------------------------------------------------------------
517
+ 2021-12-31 09:14:30,892 epoch 12 - iter 180/1807 - loss 0.09334718 - samples/sec: 88.66 - lr: 0.100000
518
+ 2021-12-31 09:14:46,737 epoch 12 - iter 360/1807 - loss 0.09477923 - samples/sec: 91.02 - lr: 0.100000
519
+ 2021-12-31 09:15:02,926 epoch 12 - iter 540/1807 - loss 0.09677398 - samples/sec: 89.09 - lr: 0.100000
520
+ 2021-12-31 09:15:19,177 epoch 12 - iter 720/1807 - loss 0.09825518 - samples/sec: 88.74 - lr: 0.100000
521
+ 2021-12-31 09:15:34,958 epoch 12 - iter 900/1807 - loss 0.09910665 - samples/sec: 91.38 - lr: 0.100000
522
+ 2021-12-31 09:15:51,056 epoch 12 - iter 1080/1807 - loss 0.09820501 - samples/sec: 89.59 - lr: 0.100000
523
+ 2021-12-31 09:16:07,231 epoch 12 - iter 1260/1807 - loss 0.09858638 - samples/sec: 89.16 - lr: 0.100000
524
+ 2021-12-31 09:16:22,988 epoch 12 - iter 1440/1807 - loss 0.09845736 - samples/sec: 91.52 - lr: 0.100000
525
+ 2021-12-31 09:16:38,631 epoch 12 - iter 1620/1807 - loss 0.09859390 - samples/sec: 92.21 - lr: 0.100000
526
+ 2021-12-31 09:16:54,209 epoch 12 - iter 1800/1807 - loss 0.09847298 - samples/sec: 92.58 - lr: 0.100000
527
+ 2021-12-31 09:16:54,729 ----------------------------------------------------------------------------------------------------
528
+ 2021-12-31 09:16:54,730 EPOCH 12 done: loss 0.0984 - lr 0.1000000
529
+ 2021-12-31 09:17:31,308 DEV : loss 0.06410104781389236 - f1-score (micro avg) 0.9816
530
+ 2021-12-31 09:17:31,487 BAD EPOCHS (no improvement): 0
531
+ 2021-12-31 09:17:31,489 saving best model
532
+ 2021-12-31 09:17:37,260 ----------------------------------------------------------------------------------------------------
533
+ 2021-12-31 09:17:54,060 epoch 13 - iter 180/1807 - loss 0.10013605 - samples/sec: 85.86 - lr: 0.100000
534
+ 2021-12-31 09:18:09,827 epoch 13 - iter 360/1807 - loss 0.09881566 - samples/sec: 91.47 - lr: 0.100000
535
+ 2021-12-31 09:18:25,218 epoch 13 - iter 540/1807 - loss 0.09860664 - samples/sec: 93.71 - lr: 0.100000
536
+ 2021-12-31 09:18:41,246 epoch 13 - iter 720/1807 - loss 0.09768065 - samples/sec: 89.97 - lr: 0.100000
537
+ 2021-12-31 09:18:57,306 epoch 13 - iter 900/1807 - loss 0.09766501 - samples/sec: 89.79 - lr: 0.100000
538
+ 2021-12-31 09:19:12,914 epoch 13 - iter 1080/1807 - loss 0.09767968 - samples/sec: 92.41 - lr: 0.100000
539
+ 2021-12-31 09:19:29,144 epoch 13 - iter 1260/1807 - loss 0.09667902 - samples/sec: 88.86 - lr: 0.100000
540
+ 2021-12-31 09:19:45,573 epoch 13 - iter 1440/1807 - loss 0.09670686 - samples/sec: 87.78 - lr: 0.100000
541
+ 2021-12-31 09:20:01,566 epoch 13 - iter 1620/1807 - loss 0.09672936 - samples/sec: 90.18 - lr: 0.100000
542
+ 2021-12-31 09:20:17,572 epoch 13 - iter 1800/1807 - loss 0.09666135 - samples/sec: 90.10 - lr: 0.100000
543
+ 2021-12-31 09:20:18,200 ----------------------------------------------------------------------------------------------------
544
+ 2021-12-31 09:20:18,200 EPOCH 13 done: loss 0.0967 - lr 0.1000000
545
+ 2021-12-31 09:20:54,147 DEV : loss 0.06427688896656036 - f1-score (micro avg) 0.9816
546
+ 2021-12-31 09:20:54,334 BAD EPOCHS (no improvement): 1
547
+ 2021-12-31 09:20:54,335 ----------------------------------------------------------------------------------------------------
548
+ 2021-12-31 09:21:10,174 epoch 14 - iter 180/1807 - loss 0.09391481 - samples/sec: 91.06 - lr: 0.100000
549
+ 2021-12-31 09:21:26,400 epoch 14 - iter 360/1807 - loss 0.09267418 - samples/sec: 88.88 - lr: 0.100000
550
+ 2021-12-31 09:21:42,313 epoch 14 - iter 540/1807 - loss 0.09273735 - samples/sec: 90.64 - lr: 0.100000
551
+ 2021-12-31 09:21:58,477 epoch 14 - iter 720/1807 - loss 0.09237732 - samples/sec: 89.22 - lr: 0.100000
552
+ 2021-12-31 09:22:14,088 epoch 14 - iter 900/1807 - loss 0.09290387 - samples/sec: 92.38 - lr: 0.100000
553
+ 2021-12-31 09:22:29,793 epoch 14 - iter 1080/1807 - loss 0.09305725 - samples/sec: 91.82 - lr: 0.100000
554
+ 2021-12-31 09:22:45,455 epoch 14 - iter 1260/1807 - loss 0.09321173 - samples/sec: 92.09 - lr: 0.100000
555
+ 2021-12-31 09:23:01,412 epoch 14 - iter 1440/1807 - loss 0.09321459 - samples/sec: 90.38 - lr: 0.100000
556
+ 2021-12-31 09:23:17,629 epoch 14 - iter 1620/1807 - loss 0.09332877 - samples/sec: 88.93 - lr: 0.100000
557
+ 2021-12-31 09:23:33,527 epoch 14 - iter 1800/1807 - loss 0.09313892 - samples/sec: 90.71 - lr: 0.100000
558
+ 2021-12-31 09:23:34,165 ----------------------------------------------------------------------------------------------------
559
+ 2021-12-31 09:23:34,165 EPOCH 14 done: loss 0.0931 - lr 0.1000000
560
+ 2021-12-31 09:24:12,840 DEV : loss 0.06639766693115234 - f1-score (micro avg) 0.9817
561
+ 2021-12-31 09:24:13,034 BAD EPOCHS (no improvement): 0
562
+ 2021-12-31 09:24:13,036 saving best model
563
+ 2021-12-31 09:24:18,822 ----------------------------------------------------------------------------------------------------
564
+ 2021-12-31 09:24:34,568 epoch 15 - iter 180/1807 - loss 0.09134784 - samples/sec: 91.60 - lr: 0.100000
565
+ 2021-12-31 09:24:50,712 epoch 15 - iter 360/1807 - loss 0.09119751 - samples/sec: 89.33 - lr: 0.100000
566
+ 2021-12-31 09:25:07,155 epoch 15 - iter 540/1807 - loss 0.08993505 - samples/sec: 87.70 - lr: 0.100000
567
+ 2021-12-31 09:25:23,092 epoch 15 - iter 720/1807 - loss 0.09062331 - samples/sec: 90.50 - lr: 0.100000
568
+ 2021-12-31 09:25:39,643 epoch 15 - iter 900/1807 - loss 0.09054947 - samples/sec: 87.13 - lr: 0.100000
569
+ 2021-12-31 09:25:56,080 epoch 15 - iter 1080/1807 - loss 0.09120586 - samples/sec: 87.73 - lr: 0.100000
570
+ 2021-12-31 09:26:12,023 epoch 15 - iter 1260/1807 - loss 0.09202164 - samples/sec: 90.49 - lr: 0.100000
571
+ 2021-12-31 09:26:27,452 epoch 15 - iter 1440/1807 - loss 0.09257595 - samples/sec: 93.48 - lr: 0.100000
572
+ 2021-12-31 09:26:43,293 epoch 15 - iter 1620/1807 - loss 0.09296868 - samples/sec: 91.04 - lr: 0.100000
573
+ 2021-12-31 09:26:59,412 epoch 15 - iter 1800/1807 - loss 0.09272942 - samples/sec: 89.47 - lr: 0.100000
574
+ 2021-12-31 09:26:59,991 ----------------------------------------------------------------------------------------------------
575
+ 2021-12-31 09:26:59,991 EPOCH 15 done: loss 0.0927 - lr 0.1000000
576
+ 2021-12-31 09:27:36,227 DEV : loss 0.06283392012119293 - f1-score (micro avg) 0.982
577
+ 2021-12-31 09:27:36,433 BAD EPOCHS (no improvement): 0
578
+ 2021-12-31 09:27:36,435 saving best model
579
+ 2021-12-31 09:27:42,216 ----------------------------------------------------------------------------------------------------
580
+ 2021-12-31 09:27:58,274 epoch 16 - iter 180/1807 - loss 0.08868552 - samples/sec: 89.83 - lr: 0.100000
581
+ 2021-12-31 09:28:14,083 epoch 16 - iter 360/1807 - loss 0.08898795 - samples/sec: 91.23 - lr: 0.100000
582
+ 2021-12-31 09:28:30,428 epoch 16 - iter 540/1807 - loss 0.08723848 - samples/sec: 88.23 - lr: 0.100000
583
+ 2021-12-31 09:28:46,065 epoch 16 - iter 720/1807 - loss 0.08840922 - samples/sec: 92.21 - lr: 0.100000
584
+ 2021-12-31 09:29:01,697 epoch 16 - iter 900/1807 - loss 0.08907246 - samples/sec: 92.26 - lr: 0.100000
585
+ 2021-12-31 09:29:17,387 epoch 16 - iter 1080/1807 - loss 0.09016391 - samples/sec: 91.91 - lr: 0.100000
586
+ 2021-12-31 09:29:33,637 epoch 16 - iter 1260/1807 - loss 0.09090909 - samples/sec: 88.74 - lr: 0.100000
587
+ 2021-12-31 09:29:49,596 epoch 16 - iter 1440/1807 - loss 0.09079363 - samples/sec: 90.36 - lr: 0.100000
588
+ 2021-12-31 09:30:05,085 epoch 16 - iter 1620/1807 - loss 0.09144623 - samples/sec: 93.12 - lr: 0.100000
589
+ 2021-12-31 09:30:21,000 epoch 16 - iter 1800/1807 - loss 0.09062250 - samples/sec: 90.62 - lr: 0.100000
590
+ 2021-12-31 09:30:21,608 ----------------------------------------------------------------------------------------------------
591
+ 2021-12-31 09:30:21,608 EPOCH 16 done: loss 0.0906 - lr 0.1000000
592
+ 2021-12-31 09:30:58,333 DEV : loss 0.06354553997516632 - f1-score (micro avg) 0.982
593
+ 2021-12-31 09:30:58,512 BAD EPOCHS (no improvement): 1
594
+ 2021-12-31 09:30:58,514 ----------------------------------------------------------------------------------------------------
595
+ 2021-12-31 09:31:14,847 epoch 17 - iter 180/1807 - loss 0.08390522 - samples/sec: 88.30 - lr: 0.100000
596
+ 2021-12-31 09:31:30,522 epoch 17 - iter 360/1807 - loss 0.08649584 - samples/sec: 92.01 - lr: 0.100000
597
+ 2021-12-31 09:31:46,288 epoch 17 - iter 540/1807 - loss 0.08940335 - samples/sec: 91.48 - lr: 0.100000
598
+ 2021-12-31 09:32:02,118 epoch 17 - iter 720/1807 - loss 0.09059873 - samples/sec: 91.09 - lr: 0.100000
599
+ 2021-12-31 09:32:17,806 epoch 17 - iter 900/1807 - loss 0.09026440 - samples/sec: 91.93 - lr: 0.100000
600
+ 2021-12-31 09:32:33,488 epoch 17 - iter 1080/1807 - loss 0.09038711 - samples/sec: 91.96 - lr: 0.100000
601
+ 2021-12-31 09:32:49,442 epoch 17 - iter 1260/1807 - loss 0.08978670 - samples/sec: 90.39 - lr: 0.100000
602
+ 2021-12-31 09:33:05,170 epoch 17 - iter 1440/1807 - loss 0.08929018 - samples/sec: 91.69 - lr: 0.100000
603
+ 2021-12-31 09:33:21,122 epoch 17 - iter 1620/1807 - loss 0.08920206 - samples/sec: 90.40 - lr: 0.100000
604
+ 2021-12-31 09:33:36,598 epoch 17 - iter 1800/1807 - loss 0.08958801 - samples/sec: 93.18 - lr: 0.100000
605
+ 2021-12-31 09:33:37,149 ----------------------------------------------------------------------------------------------------
606
+ 2021-12-31 09:33:37,149 EPOCH 17 done: loss 0.0895 - lr 0.1000000
607
+ 2021-12-31 09:34:16,446 DEV : loss 0.06361010670661926 - f1-score (micro avg) 0.9823
608
+ 2021-12-31 09:34:16,599 BAD EPOCHS (no improvement): 0
609
+ 2021-12-31 09:34:16,600 saving best model
610
+ 2021-12-31 09:34:22,434 ----------------------------------------------------------------------------------------------------
611
+ 2021-12-31 09:34:38,419 epoch 18 - iter 180/1807 - loss 0.08343062 - samples/sec: 90.22 - lr: 0.100000
612
+ 2021-12-31 09:34:54,655 epoch 18 - iter 360/1807 - loss 0.08575852 - samples/sec: 88.82 - lr: 0.100000
613
+ 2021-12-31 09:35:10,385 epoch 18 - iter 540/1807 - loss 0.08392644 - samples/sec: 91.68 - lr: 0.100000
614
+ 2021-12-31 09:35:26,310 epoch 18 - iter 720/1807 - loss 0.08351999 - samples/sec: 90.57 - lr: 0.100000
615
+ 2021-12-31 09:35:41,876 epoch 18 - iter 900/1807 - loss 0.08509375 - samples/sec: 92.64 - lr: 0.100000
616
+ 2021-12-31 09:35:57,882 epoch 18 - iter 1080/1807 - loss 0.08493115 - samples/sec: 90.10 - lr: 0.100000
617
+ 2021-12-31 09:36:13,926 epoch 18 - iter 1260/1807 - loss 0.08609299 - samples/sec: 89.88 - lr: 0.100000
618
+ 2021-12-31 09:36:30,070 epoch 18 - iter 1440/1807 - loss 0.08644835 - samples/sec: 89.34 - lr: 0.100000
619
+ 2021-12-31 09:36:45,689 epoch 18 - iter 1620/1807 - loss 0.08698449 - samples/sec: 92.33 - lr: 0.100000
620
+ 2021-12-31 09:37:01,595 epoch 18 - iter 1800/1807 - loss 0.08715385 - samples/sec: 90.66 - lr: 0.100000
621
+ 2021-12-31 09:37:02,116 ----------------------------------------------------------------------------------------------------
622
+ 2021-12-31 09:37:02,116 EPOCH 18 done: loss 0.0872 - lr 0.1000000
623
+ 2021-12-31 09:37:38,287 DEV : loss 0.06376409530639648 - f1-score (micro avg) 0.982
624
+ 2021-12-31 09:37:38,491 BAD EPOCHS (no improvement): 1
625
+ 2021-12-31 09:37:38,492 ----------------------------------------------------------------------------------------------------
626
+ 2021-12-31 09:37:54,464 epoch 19 - iter 180/1807 - loss 0.07802257 - samples/sec: 90.31 - lr: 0.100000
627
+ 2021-12-31 09:38:10,256 epoch 19 - iter 360/1807 - loss 0.07892620 - samples/sec: 91.32 - lr: 0.100000
628
+ 2021-12-31 09:38:26,632 epoch 19 - iter 540/1807 - loss 0.08133170 - samples/sec: 88.06 - lr: 0.100000
629
+ 2021-12-31 09:38:42,673 epoch 19 - iter 720/1807 - loss 0.08367885 - samples/sec: 89.91 - lr: 0.100000
630
+ 2021-12-31 09:38:58,503 epoch 19 - iter 900/1807 - loss 0.08447871 - samples/sec: 91.11 - lr: 0.100000
631
+ 2021-12-31 09:39:14,461 epoch 19 - iter 1080/1807 - loss 0.08413767 - samples/sec: 90.37 - lr: 0.100000
632
+ 2021-12-31 09:39:30,176 epoch 19 - iter 1260/1807 - loss 0.08455665 - samples/sec: 91.77 - lr: 0.100000
633
+ 2021-12-31 09:39:46,325 epoch 19 - iter 1440/1807 - loss 0.08578599 - samples/sec: 89.30 - lr: 0.100000
634
+ 2021-12-31 09:40:02,191 epoch 19 - iter 1620/1807 - loss 0.08628902 - samples/sec: 90.90 - lr: 0.100000
635
+ 2021-12-31 09:40:18,069 epoch 19 - iter 1800/1807 - loss 0.08634962 - samples/sec: 90.82 - lr: 0.100000
636
+ 2021-12-31 09:40:18,635 ----------------------------------------------------------------------------------------------------
637
+ 2021-12-31 09:40:18,636 EPOCH 19 done: loss 0.0863 - lr 0.1000000
638
+ 2021-12-31 09:40:54,638 DEV : loss 0.06360483914613724 - f1-score (micro avg) 0.9824
639
+ 2021-12-31 09:40:54,809 BAD EPOCHS (no improvement): 0
640
+ 2021-12-31 09:40:54,812 saving best model
641
+ 2021-12-31 09:41:00,532 ----------------------------------------------------------------------------------------------------
642
+ 2021-12-31 09:41:16,605 epoch 20 - iter 180/1807 - loss 0.08580796 - samples/sec: 89.75 - lr: 0.100000
643
+ 2021-12-31 09:41:32,626 epoch 20 - iter 360/1807 - loss 0.08441046 - samples/sec: 90.02 - lr: 0.100000
644
+ 2021-12-31 09:41:48,195 epoch 20 - iter 540/1807 - loss 0.08457436 - samples/sec: 92.63 - lr: 0.100000
645
+ 2021-12-31 09:42:03,884 epoch 20 - iter 720/1807 - loss 0.08433505 - samples/sec: 91.92 - lr: 0.100000
646
+ 2021-12-31 09:42:19,662 epoch 20 - iter 900/1807 - loss 0.08465375 - samples/sec: 91.40 - lr: 0.100000
647
+ 2021-12-31 09:42:35,290 epoch 20 - iter 1080/1807 - loss 0.08384813 - samples/sec: 92.28 - lr: 0.100000
648
+ 2021-12-31 09:42:50,667 epoch 20 - iter 1260/1807 - loss 0.08437448 - samples/sec: 93.79 - lr: 0.100000
649
+ 2021-12-31 09:43:06,838 epoch 20 - iter 1440/1807 - loss 0.08483000 - samples/sec: 89.18 - lr: 0.100000
650
+ 2021-12-31 09:43:23,128 epoch 20 - iter 1620/1807 - loss 0.08554680 - samples/sec: 88.52 - lr: 0.100000
651
+ 2021-12-31 09:43:38,996 epoch 20 - iter 1800/1807 - loss 0.08579345 - samples/sec: 90.89 - lr: 0.100000
652
+ 2021-12-31 09:43:39,520 ----------------------------------------------------------------------------------------------------
653
+ 2021-12-31 09:43:39,520 EPOCH 20 done: loss 0.0858 - lr 0.1000000
654
+ 2021-12-31 09:44:18,433 DEV : loss 0.06494450569152832 - f1-score (micro avg) 0.982
655
+ 2021-12-31 09:44:18,588 BAD EPOCHS (no improvement): 1
656
+ 2021-12-31 09:44:18,590 ----------------------------------------------------------------------------------------------------
657
+ 2021-12-31 09:44:34,495 epoch 21 - iter 180/1807 - loss 0.08058450 - samples/sec: 90.65 - lr: 0.100000
658
+ 2021-12-31 09:44:50,061 epoch 21 - iter 360/1807 - loss 0.08169987 - samples/sec: 92.62 - lr: 0.100000
659
+ 2021-12-31 09:45:05,780 epoch 21 - iter 540/1807 - loss 0.08147401 - samples/sec: 91.76 - lr: 0.100000
660
+ 2021-12-31 09:45:21,869 epoch 21 - iter 720/1807 - loss 0.08235327 - samples/sec: 89.64 - lr: 0.100000
661
+ 2021-12-31 09:45:38,316 epoch 21 - iter 900/1807 - loss 0.08324710 - samples/sec: 87.67 - lr: 0.100000
662
+ 2021-12-31 09:45:54,314 epoch 21 - iter 1080/1807 - loss 0.08294963 - samples/sec: 90.14 - lr: 0.100000
663
+ 2021-12-31 09:46:10,369 epoch 21 - iter 1260/1807 - loss 0.08355307 - samples/sec: 89.83 - lr: 0.100000
664
+ 2021-12-31 09:46:26,469 epoch 21 - iter 1440/1807 - loss 0.08343050 - samples/sec: 89.57 - lr: 0.100000
665
+ 2021-12-31 09:46:42,401 epoch 21 - iter 1620/1807 - loss 0.08414815 - samples/sec: 90.52 - lr: 0.100000
666
+ 2021-12-31 09:46:58,257 epoch 21 - iter 1800/1807 - loss 0.08376554 - samples/sec: 90.95 - lr: 0.100000
667
+ 2021-12-31 09:46:58,880 ----------------------------------------------------------------------------------------------------
668
+ 2021-12-31 09:46:58,880 EPOCH 21 done: loss 0.0839 - lr 0.1000000
669
+ 2021-12-31 09:47:35,248 DEV : loss 0.06328344345092773 - f1-score (micro avg) 0.9827
670
+ 2021-12-31 09:47:35,446 BAD EPOCHS (no improvement): 0
671
+ 2021-12-31 09:47:35,448 saving best model
672
+ 2021-12-31 09:47:41,248 ----------------------------------------------------------------------------------------------------
673
+ 2021-12-31 09:47:57,255 epoch 22 - iter 180/1807 - loss 0.08050373 - samples/sec: 90.12 - lr: 0.100000
674
+ 2021-12-31 09:48:13,186 epoch 22 - iter 360/1807 - loss 0.08239139 - samples/sec: 90.52 - lr: 0.100000
675
+ 2021-12-31 09:48:29,067 epoch 22 - iter 540/1807 - loss 0.08228212 - samples/sec: 90.81 - lr: 0.100000
676
+ 2021-12-31 09:48:45,039 epoch 22 - iter 720/1807 - loss 0.08279713 - samples/sec: 90.30 - lr: 0.100000
677
+ 2021-12-31 09:49:00,510 epoch 22 - iter 900/1807 - loss 0.08334789 - samples/sec: 93.22 - lr: 0.100000
678
+ 2021-12-31 09:49:16,362 epoch 22 - iter 1080/1807 - loss 0.08342389 - samples/sec: 90.97 - lr: 0.100000
679
+ 2021-12-31 09:49:32,567 epoch 22 - iter 1260/1807 - loss 0.08349166 - samples/sec: 88.99 - lr: 0.100000
680
+ 2021-12-31 09:49:48,320 epoch 22 - iter 1440/1807 - loss 0.08427908 - samples/sec: 91.55 - lr: 0.100000
681
+ 2021-12-31 09:50:04,570 epoch 22 - iter 1620/1807 - loss 0.08465300 - samples/sec: 88.75 - lr: 0.100000
682
+ 2021-12-31 09:50:20,943 epoch 22 - iter 1800/1807 - loss 0.08437528 - samples/sec: 88.07 - lr: 0.100000
683
+ 2021-12-31 09:50:21,480 ----------------------------------------------------------------------------------------------------
684
+ 2021-12-31 09:50:21,480 EPOCH 22 done: loss 0.0844 - lr 0.1000000
685
+ 2021-12-31 09:50:58,771 DEV : loss 0.06346500664949417 - f1-score (micro avg) 0.9815
686
+ 2021-12-31 09:50:58,967 BAD EPOCHS (no improvement): 1
687
+ 2021-12-31 09:50:58,969 ----------------------------------------------------------------------------------------------------
688
+ 2021-12-31 09:51:15,272 epoch 23 - iter 180/1807 - loss 0.07857499 - samples/sec: 88.47 - lr: 0.100000
689
+ 2021-12-31 09:51:31,123 epoch 23 - iter 360/1807 - loss 0.07736816 - samples/sec: 91.00 - lr: 0.100000
690
+ 2021-12-31 09:51:47,441 epoch 23 - iter 540/1807 - loss 0.07865886 - samples/sec: 88.38 - lr: 0.100000
691
+ 2021-12-31 09:52:03,508 epoch 23 - iter 720/1807 - loss 0.08053686 - samples/sec: 89.75 - lr: 0.100000
692
+ 2021-12-31 09:52:19,618 epoch 23 - iter 900/1807 - loss 0.08084826 - samples/sec: 89.52 - lr: 0.100000
693
+ 2021-12-31 09:52:35,467 epoch 23 - iter 1080/1807 - loss 0.08116025 - samples/sec: 91.00 - lr: 0.100000
694
+ 2021-12-31 09:52:51,307 epoch 23 - iter 1260/1807 - loss 0.08137722 - samples/sec: 91.04 - lr: 0.100000
695
+ 2021-12-31 09:53:07,605 epoch 23 - iter 1440/1807 - loss 0.08168418 - samples/sec: 88.48 - lr: 0.100000
696
+ 2021-12-31 09:53:23,242 epoch 23 - iter 1620/1807 - loss 0.08161521 - samples/sec: 92.22 - lr: 0.100000
697
+ 2021-12-31 09:53:38,917 epoch 23 - iter 1800/1807 - loss 0.08147531 - samples/sec: 92.01 - lr: 0.100000
698
+ 2021-12-31 09:53:39,396 ----------------------------------------------------------------------------------------------------
699
+ 2021-12-31 09:53:39,396 EPOCH 23 done: loss 0.0814 - lr 0.1000000
700
+ 2021-12-31 09:54:15,841 DEV : loss 0.06540019810199738 - f1-score (micro avg) 0.9821
701
+ 2021-12-31 09:54:16,023 BAD EPOCHS (no improvement): 2
702
+ 2021-12-31 09:54:16,025 ----------------------------------------------------------------------------------------------------
703
+ 2021-12-31 09:54:32,334 epoch 24 - iter 180/1807 - loss 0.07795468 - samples/sec: 88.43 - lr: 0.100000
704
+ 2021-12-31 09:54:48,084 epoch 24 - iter 360/1807 - loss 0.07908717 - samples/sec: 91.57 - lr: 0.100000
705
+ 2021-12-31 09:55:04,326 epoch 24 - iter 540/1807 - loss 0.08004992 - samples/sec: 88.79 - lr: 0.100000
706
+ 2021-12-31 09:55:20,651 epoch 24 - iter 720/1807 - loss 0.08100541 - samples/sec: 88.34 - lr: 0.100000
707
+ 2021-12-31 09:55:36,785 epoch 24 - iter 900/1807 - loss 0.08142507 - samples/sec: 89.38 - lr: 0.100000
708
+ 2021-12-31 09:55:52,742 epoch 24 - iter 1080/1807 - loss 0.08232817 - samples/sec: 90.38 - lr: 0.100000
709
+ 2021-12-31 09:56:08,164 epoch 24 - iter 1260/1807 - loss 0.08188184 - samples/sec: 93.53 - lr: 0.100000
710
+ 2021-12-31 09:56:24,063 epoch 24 - iter 1440/1807 - loss 0.08243719 - samples/sec: 90.71 - lr: 0.100000
711
+ 2021-12-31 09:56:40,384 epoch 24 - iter 1620/1807 - loss 0.08222346 - samples/sec: 88.35 - lr: 0.100000
712
+ 2021-12-31 09:56:56,011 epoch 24 - iter 1800/1807 - loss 0.08229498 - samples/sec: 92.29 - lr: 0.100000
713
+ 2021-12-31 09:56:56,616 ----------------------------------------------------------------------------------------------------
714
+ 2021-12-31 09:56:56,616 EPOCH 24 done: loss 0.0822 - lr 0.1000000
715
+ 2021-12-31 09:57:35,721 DEV : loss 0.06453310698270798 - f1-score (micro avg) 0.9819
716
+ 2021-12-31 09:57:35,917 BAD EPOCHS (no improvement): 3
717
+ 2021-12-31 09:57:35,919 ----------------------------------------------------------------------------------------------------
718
+ 2021-12-31 09:57:52,048 epoch 25 - iter 180/1807 - loss 0.07765362 - samples/sec: 89.42 - lr: 0.100000
719
+ 2021-12-31 09:58:07,956 epoch 25 - iter 360/1807 - loss 0.07932940 - samples/sec: 90.65 - lr: 0.100000
720
+ 2021-12-31 09:58:23,863 epoch 25 - iter 540/1807 - loss 0.08046614 - samples/sec: 90.65 - lr: 0.100000
721
+ 2021-12-31 09:58:39,725 epoch 25 - iter 720/1807 - loss 0.07941669 - samples/sec: 90.92 - lr: 0.100000
722
+ 2021-12-31 09:58:55,303 epoch 25 - iter 900/1807 - loss 0.08092722 - samples/sec: 92.57 - lr: 0.100000
723
+ 2021-12-31 09:59:11,794 epoch 25 - iter 1080/1807 - loss 0.08150485 - samples/sec: 87.44 - lr: 0.100000
724
+ 2021-12-31 09:59:27,795 epoch 25 - iter 1260/1807 - loss 0.08118184 - samples/sec: 90.13 - lr: 0.100000
725
+ 2021-12-31 09:59:43,595 epoch 25 - iter 1440/1807 - loss 0.08068256 - samples/sec: 91.28 - lr: 0.100000
726
+ 2021-12-31 09:59:59,146 epoch 25 - iter 1620/1807 - loss 0.08113371 - samples/sec: 92.74 - lr: 0.100000
727
+ 2021-12-31 10:00:14,684 epoch 25 - iter 1800/1807 - loss 0.08112289 - samples/sec: 92.81 - lr: 0.100000
728
+ 2021-12-31 10:00:15,230 ----------------------------------------------------------------------------------------------------
729
+ 2021-12-31 10:00:15,230 EPOCH 25 done: loss 0.0812 - lr 0.1000000
730
+ 2021-12-31 10:00:51,681 DEV : loss 0.06579063087701797 - f1-score (micro avg) 0.9817
731
+ 2021-12-31 10:00:51,872 BAD EPOCHS (no improvement): 4
732
+ 2021-12-31 10:00:51,874 ----------------------------------------------------------------------------------------------------
733
+ 2021-12-31 10:01:08,252 epoch 26 - iter 180/1807 - loss 0.07473820 - samples/sec: 88.06 - lr: 0.050000
734
+ 2021-12-31 10:01:24,095 epoch 26 - iter 360/1807 - loss 0.07741051 - samples/sec: 91.03 - lr: 0.050000
735
+ 2021-12-31 10:01:40,042 epoch 26 - iter 540/1807 - loss 0.07612793 - samples/sec: 90.43 - lr: 0.050000
736
+ 2021-12-31 10:01:55,977 epoch 26 - iter 720/1807 - loss 0.07597233 - samples/sec: 90.49 - lr: 0.050000
737
+ 2021-12-31 10:02:12,264 epoch 26 - iter 900/1807 - loss 0.07560347 - samples/sec: 88.55 - lr: 0.050000
738
+ 2021-12-31 10:02:28,030 epoch 26 - iter 1080/1807 - loss 0.07626889 - samples/sec: 91.47 - lr: 0.050000
739
+ 2021-12-31 10:02:43,691 epoch 26 - iter 1260/1807 - loss 0.07613186 - samples/sec: 92.08 - lr: 0.050000
740
+ 2021-12-31 10:02:59,223 epoch 26 - iter 1440/1807 - loss 0.07558384 - samples/sec: 92.85 - lr: 0.050000
741
+ 2021-12-31 10:03:15,259 epoch 26 - iter 1620/1807 - loss 0.07503334 - samples/sec: 89.93 - lr: 0.050000
742
+ 2021-12-31 10:03:31,614 epoch 26 - iter 1800/1807 - loss 0.07448614 - samples/sec: 88.18 - lr: 0.050000
743
+ 2021-12-31 10:03:32,151 ----------------------------------------------------------------------------------------------------
744
+ 2021-12-31 10:03:32,151 EPOCH 26 done: loss 0.0744 - lr 0.0500000
745
+ 2021-12-31 10:04:08,767 DEV : loss 0.06646668165922165 - f1-score (micro avg) 0.9822
746
+ 2021-12-31 10:04:08,949 BAD EPOCHS (no improvement): 1
747
+ 2021-12-31 10:04:08,950 ----------------------------------------------------------------------------------------------------
748
+ 2021-12-31 10:04:25,529 epoch 27 - iter 180/1807 - loss 0.06581114 - samples/sec: 86.99 - lr: 0.050000
749
+ 2021-12-31 10:04:41,436 epoch 27 - iter 360/1807 - loss 0.06857834 - samples/sec: 90.66 - lr: 0.050000
750
+ 2021-12-31 10:04:57,191 epoch 27 - iter 540/1807 - loss 0.07081005 - samples/sec: 91.54 - lr: 0.050000
751
+ 2021-12-31 10:05:13,183 epoch 27 - iter 720/1807 - loss 0.07198836 - samples/sec: 90.18 - lr: 0.050000
752
+ 2021-12-31 10:05:29,131 epoch 27 - iter 900/1807 - loss 0.07153264 - samples/sec: 90.42 - lr: 0.050000
753
+ 2021-12-31 10:05:44,864 epoch 27 - iter 1080/1807 - loss 0.07164274 - samples/sec: 91.66 - lr: 0.050000
754
+ 2021-12-31 10:06:00,643 epoch 27 - iter 1260/1807 - loss 0.07167991 - samples/sec: 91.40 - lr: 0.050000
755
+ 2021-12-31 10:06:15,929 epoch 27 - iter 1440/1807 - loss 0.07130117 - samples/sec: 94.34 - lr: 0.050000
756
+ 2021-12-31 10:06:32,208 epoch 27 - iter 1620/1807 - loss 0.07137995 - samples/sec: 88.59 - lr: 0.050000
757
+ 2021-12-31 10:06:48,072 epoch 27 - iter 1800/1807 - loss 0.07123898 - samples/sec: 90.90 - lr: 0.050000
758
+ 2021-12-31 10:06:48,616 ----------------------------------------------------------------------------------------------------
759
+ 2021-12-31 10:06:48,616 EPOCH 27 done: loss 0.0712 - lr 0.0500000
760
+ 2021-12-31 10:07:27,769 DEV : loss 0.06514652073383331 - f1-score (micro avg) 0.9823
761
+ 2021-12-31 10:07:27,967 BAD EPOCHS (no improvement): 2
762
+ 2021-12-31 10:07:27,968 ----------------------------------------------------------------------------------------------------
763
+ 2021-12-31 10:07:43,921 epoch 28 - iter 180/1807 - loss 0.06865415 - samples/sec: 90.41 - lr: 0.050000
764
+ 2021-12-31 10:08:00,073 epoch 28 - iter 360/1807 - loss 0.06855531 - samples/sec: 89.28 - lr: 0.050000
765
+ 2021-12-31 10:08:16,259 epoch 28 - iter 540/1807 - loss 0.06891820 - samples/sec: 89.09 - lr: 0.050000
766
+ 2021-12-31 10:08:31,981 epoch 28 - iter 720/1807 - loss 0.06951336 - samples/sec: 91.73 - lr: 0.050000
767
+ 2021-12-31 10:08:47,429 epoch 28 - iter 900/1807 - loss 0.07014278 - samples/sec: 93.35 - lr: 0.050000
768
+ 2021-12-31 10:09:03,024 epoch 28 - iter 1080/1807 - loss 0.07071541 - samples/sec: 92.47 - lr: 0.050000
769
+ 2021-12-31 10:09:18,974 epoch 28 - iter 1260/1807 - loss 0.07012373 - samples/sec: 90.41 - lr: 0.050000
770
+ 2021-12-31 10:09:34,620 epoch 28 - iter 1440/1807 - loss 0.07028479 - samples/sec: 92.17 - lr: 0.050000
771
+ 2021-12-31 10:09:50,427 epoch 28 - iter 1620/1807 - loss 0.07017402 - samples/sec: 91.23 - lr: 0.050000
772
+ 2021-12-31 10:10:05,997 epoch 28 - iter 1800/1807 - loss 0.07002142 - samples/sec: 92.62 - lr: 0.050000
773
+ 2021-12-31 10:10:06,547 ----------------------------------------------------------------------------------------------------
774
+ 2021-12-31 10:10:06,548 EPOCH 28 done: loss 0.0701 - lr 0.0500000
775
+ 2021-12-31 10:10:43,342 DEV : loss 0.06285692006349564 - f1-score (micro avg) 0.9828
776
+ 2021-12-31 10:10:43,549 BAD EPOCHS (no improvement): 0
777
+ 2021-12-31 10:10:43,550 saving best model
778
+ 2021-12-31 10:10:49,346 ----------------------------------------------------------------------------------------------------
779
+ 2021-12-31 10:11:05,893 epoch 29 - iter 180/1807 - loss 0.06749112 - samples/sec: 87.17 - lr: 0.050000
780
+ 2021-12-31 10:11:21,660 epoch 29 - iter 360/1807 - loss 0.06704871 - samples/sec: 91.46 - lr: 0.050000
781
+ 2021-12-31 10:11:37,404 epoch 29 - iter 540/1807 - loss 0.06846136 - samples/sec: 91.60 - lr: 0.050000
782
+ 2021-12-31 10:11:53,397 epoch 29 - iter 720/1807 - loss 0.06901632 - samples/sec: 90.17 - lr: 0.050000
783
+ 2021-12-31 10:12:09,257 epoch 29 - iter 900/1807 - loss 0.06809349 - samples/sec: 90.93 - lr: 0.050000
784
+ 2021-12-31 10:12:24,599 epoch 29 - iter 1080/1807 - loss 0.06824897 - samples/sec: 94.00 - lr: 0.050000
785
+ 2021-12-31 10:12:40,447 epoch 29 - iter 1260/1807 - loss 0.06782382 - samples/sec: 91.00 - lr: 0.050000
786
+ 2021-12-31 10:12:56,595 epoch 29 - iter 1440/1807 - loss 0.06808796 - samples/sec: 89.30 - lr: 0.050000
787
+ 2021-12-31 10:13:12,755 epoch 29 - iter 1620/1807 - loss 0.06798634 - samples/sec: 89.24 - lr: 0.050000
788
+ 2021-12-31 10:13:28,701 epoch 29 - iter 1800/1807 - loss 0.06777472 - samples/sec: 90.44 - lr: 0.050000
789
+ 2021-12-31 10:13:29,227 ----------------------------------------------------------------------------------------------------
790
+ 2021-12-31 10:13:29,228 EPOCH 29 done: loss 0.0678 - lr 0.0500000
791
+ 2021-12-31 10:14:05,041 DEV : loss 0.06288447976112366 - f1-score (micro avg) 0.9831
792
+ 2021-12-31 10:14:05,221 BAD EPOCHS (no improvement): 0
793
+ 2021-12-31 10:14:05,222 saving best model
794
+ 2021-12-31 10:14:10,675 ----------------------------------------------------------------------------------------------------
795
+ 2021-12-31 10:14:26,845 epoch 30 - iter 180/1807 - loss 0.06615046 - samples/sec: 89.20 - lr: 0.050000
796
+ 2021-12-31 10:14:42,781 epoch 30 - iter 360/1807 - loss 0.06701908 - samples/sec: 90.50 - lr: 0.050000
797
+ 2021-12-31 10:14:58,746 epoch 30 - iter 540/1807 - loss 0.06748578 - samples/sec: 90.33 - lr: 0.050000
798
+ 2021-12-31 10:15:14,479 epoch 30 - iter 720/1807 - loss 0.06796474 - samples/sec: 91.66 - lr: 0.050000
799
+ 2021-12-31 10:15:30,280 epoch 30 - iter 900/1807 - loss 0.06739311 - samples/sec: 91.26 - lr: 0.050000
800
+ 2021-12-31 10:15:45,933 epoch 30 - iter 1080/1807 - loss 0.06699810 - samples/sec: 92.13 - lr: 0.050000
801
+ 2021-12-31 10:16:01,690 epoch 30 - iter 1260/1807 - loss 0.06745951 - samples/sec: 91.53 - lr: 0.050000
802
+ 2021-12-31 10:16:17,453 epoch 30 - iter 1440/1807 - loss 0.06704309 - samples/sec: 91.49 - lr: 0.050000
803
+ 2021-12-31 10:16:33,233 epoch 30 - iter 1620/1807 - loss 0.06649743 - samples/sec: 91.38 - lr: 0.050000
804
+ 2021-12-31 10:16:49,143 epoch 30 - iter 1800/1807 - loss 0.06655280 - samples/sec: 90.65 - lr: 0.050000
805
+ 2021-12-31 10:16:49,685 ----------------------------------------------------------------------------------------------------
806
+ 2021-12-31 10:16:49,685 EPOCH 30 done: loss 0.0666 - lr 0.0500000
807
+ 2021-12-31 10:17:28,240 DEV : loss 0.06311798095703125 - f1-score (micro avg) 0.9824
808
+ 2021-12-31 10:17:28,433 BAD EPOCHS (no improvement): 1
809
+ 2021-12-31 10:17:28,434 ----------------------------------------------------------------------------------------------------
810
+ 2021-12-31 10:17:44,966 epoch 31 - iter 180/1807 - loss 0.06627745 - samples/sec: 87.24 - lr: 0.050000
811
+ 2021-12-31 10:18:00,662 epoch 31 - iter 360/1807 - loss 0.06286711 - samples/sec: 91.88 - lr: 0.050000
812
+ 2021-12-31 10:18:16,307 epoch 31 - iter 540/1807 - loss 0.06454841 - samples/sec: 92.17 - lr: 0.050000
813
+ 2021-12-31 10:18:32,243 epoch 31 - iter 720/1807 - loss 0.06465161 - samples/sec: 90.50 - lr: 0.050000
814
+ 2021-12-31 10:18:47,799 epoch 31 - iter 900/1807 - loss 0.06488043 - samples/sec: 92.70 - lr: 0.050000
815
+ 2021-12-31 10:19:03,602 epoch 31 - iter 1080/1807 - loss 0.06501278 - samples/sec: 91.26 - lr: 0.050000
816
+ 2021-12-31 10:19:19,610 epoch 31 - iter 1260/1807 - loss 0.06524649 - samples/sec: 90.08 - lr: 0.050000
817
+ 2021-12-31 10:19:35,038 epoch 31 - iter 1440/1807 - loss 0.06554492 - samples/sec: 93.48 - lr: 0.050000
818
+ 2021-12-31 10:19:51,164 epoch 31 - iter 1620/1807 - loss 0.06599922 - samples/sec: 89.43 - lr: 0.050000
819
+ 2021-12-31 10:20:07,078 epoch 31 - iter 1800/1807 - loss 0.06644678 - samples/sec: 90.61 - lr: 0.050000
820
+ 2021-12-31 10:20:07,640 ----------------------------------------------------------------------------------------------------
821
+ 2021-12-31 10:20:07,640 EPOCH 31 done: loss 0.0666 - lr 0.0500000
822
+ 2021-12-31 10:20:43,927 DEV : loss 0.06285466253757477 - f1-score (micro avg) 0.9829
823
+ 2021-12-31 10:20:44,123 BAD EPOCHS (no improvement): 2
824
+ 2021-12-31 10:20:44,125 ----------------------------------------------------------------------------------------------------
825
+ 2021-12-31 10:21:00,298 epoch 32 - iter 180/1807 - loss 0.06077116 - samples/sec: 89.18 - lr: 0.050000
826
+ 2021-12-31 10:21:16,393 epoch 32 - iter 360/1807 - loss 0.06270324 - samples/sec: 89.60 - lr: 0.050000
827
+ 2021-12-31 10:21:32,158 epoch 32 - iter 540/1807 - loss 0.06340224 - samples/sec: 91.47 - lr: 0.050000
828
+ 2021-12-31 10:21:48,183 epoch 32 - iter 720/1807 - loss 0.06267842 - samples/sec: 89.99 - lr: 0.050000
829
+ 2021-12-31 10:22:03,949 epoch 32 - iter 900/1807 - loss 0.06345792 - samples/sec: 91.50 - lr: 0.050000
830
+ 2021-12-31 10:22:19,674 epoch 32 - iter 1080/1807 - loss 0.06439376 - samples/sec: 91.71 - lr: 0.050000
831
+ 2021-12-31 10:22:35,414 epoch 32 - iter 1260/1807 - loss 0.06437464 - samples/sec: 91.63 - lr: 0.050000
832
+ 2021-12-31 10:22:51,702 epoch 32 - iter 1440/1807 - loss 0.06435182 - samples/sec: 88.53 - lr: 0.050000
833
+ 2021-12-31 10:23:07,918 epoch 32 - iter 1620/1807 - loss 0.06467809 - samples/sec: 88.93 - lr: 0.050000
834
+ 2021-12-31 10:23:23,880 epoch 32 - iter 1800/1807 - loss 0.06484923 - samples/sec: 90.35 - lr: 0.050000
835
+ 2021-12-31 10:23:24,513 ----------------------------------------------------------------------------------------------------
836
+ 2021-12-31 10:23:24,513 EPOCH 32 done: loss 0.0648 - lr 0.0500000
837
+ 2021-12-31 10:24:00,678 DEV : loss 0.062373436987400055 - f1-score (micro avg) 0.9827
838
+ 2021-12-31 10:24:00,863 BAD EPOCHS (no improvement): 3
839
+ 2021-12-31 10:24:00,865 ----------------------------------------------------------------------------------------------------
840
+ 2021-12-31 10:24:17,368 epoch 33 - iter 180/1807 - loss 0.06511517 - samples/sec: 87.39 - lr: 0.050000
841
+ 2021-12-31 10:24:33,869 epoch 33 - iter 360/1807 - loss 0.06359714 - samples/sec: 87.39 - lr: 0.050000
842
+ 2021-12-31 10:24:49,974 epoch 33 - iter 540/1807 - loss 0.06324776 - samples/sec: 89.54 - lr: 0.050000
843
+ 2021-12-31 10:25:05,411 epoch 33 - iter 720/1807 - loss 0.06296883 - samples/sec: 93.42 - lr: 0.050000
844
+ 2021-12-31 10:25:21,477 epoch 33 - iter 900/1807 - loss 0.06304943 - samples/sec: 89.76 - lr: 0.050000
845
+ 2021-12-31 10:25:37,062 epoch 33 - iter 1080/1807 - loss 0.06266940 - samples/sec: 92.52 - lr: 0.050000
846
+ 2021-12-31 10:25:52,743 epoch 33 - iter 1260/1807 - loss 0.06359599 - samples/sec: 91.97 - lr: 0.050000
847
+ 2021-12-31 10:26:08,521 epoch 33 - iter 1440/1807 - loss 0.06353058 - samples/sec: 91.40 - lr: 0.050000
848
+ 2021-12-31 10:26:24,080 epoch 33 - iter 1620/1807 - loss 0.06366170 - samples/sec: 92.69 - lr: 0.050000
849
+ 2021-12-31 10:26:39,568 epoch 33 - iter 1800/1807 - loss 0.06405823 - samples/sec: 93.11 - lr: 0.050000
850
+ 2021-12-31 10:26:40,121 ----------------------------------------------------------------------------------------------------
851
+ 2021-12-31 10:26:40,121 EPOCH 33 done: loss 0.0640 - lr 0.0500000
852
+ 2021-12-31 10:27:18,678 DEV : loss 0.06352584064006805 - f1-score (micro avg) 0.983
853
+ 2021-12-31 10:27:18,875 BAD EPOCHS (no improvement): 4
854
+ 2021-12-31 10:27:18,877 ----------------------------------------------------------------------------------------------------
855
+ 2021-12-31 10:27:34,632 epoch 34 - iter 180/1807 - loss 0.05738992 - samples/sec: 91.55 - lr: 0.025000
856
+ 2021-12-31 10:27:50,783 epoch 34 - iter 360/1807 - loss 0.05964139 - samples/sec: 89.29 - lr: 0.025000
857
+ 2021-12-31 10:28:06,956 epoch 34 - iter 540/1807 - loss 0.05950577 - samples/sec: 89.16 - lr: 0.025000
858
+ 2021-12-31 10:28:23,264 epoch 34 - iter 720/1807 - loss 0.06033373 - samples/sec: 88.43 - lr: 0.025000
859
+ 2021-12-31 10:28:38,762 epoch 34 - iter 900/1807 - loss 0.06053852 - samples/sec: 93.06 - lr: 0.025000
860
+ 2021-12-31 10:28:54,790 epoch 34 - iter 1080/1807 - loss 0.06008683 - samples/sec: 89.97 - lr: 0.025000
861
+ 2021-12-31 10:29:10,752 epoch 34 - iter 1260/1807 - loss 0.06017032 - samples/sec: 90.34 - lr: 0.025000
862
+ 2021-12-31 10:29:26,533 epoch 34 - iter 1440/1807 - loss 0.06026720 - samples/sec: 91.39 - lr: 0.025000
863
+ 2021-12-31 10:29:41,962 epoch 34 - iter 1620/1807 - loss 0.06023939 - samples/sec: 93.47 - lr: 0.025000
864
+ 2021-12-31 10:29:57,974 epoch 34 - iter 1800/1807 - loss 0.06024915 - samples/sec: 90.06 - lr: 0.025000
865
+ 2021-12-31 10:29:58,641 ----------------------------------------------------------------------------------------------------
866
+ 2021-12-31 10:29:58,642 EPOCH 34 done: loss 0.0602 - lr 0.0250000
867
+ 2021-12-31 10:30:34,901 DEV : loss 0.06348917633295059 - f1-score (micro avg) 0.9835
868
+ 2021-12-31 10:30:35,087 BAD EPOCHS (no improvement): 0
869
+ 2021-12-31 10:30:35,089 saving best model
870
+ 2021-12-31 10:30:40,883 ----------------------------------------------------------------------------------------------------
871
+ 2021-12-31 10:30:57,202 epoch 35 - iter 180/1807 - loss 0.05878333 - samples/sec: 88.38 - lr: 0.025000
872
+ 2021-12-31 10:31:12,996 epoch 35 - iter 360/1807 - loss 0.05795906 - samples/sec: 91.32 - lr: 0.025000
873
+ 2021-12-31 10:31:29,079 epoch 35 - iter 540/1807 - loss 0.05935994 - samples/sec: 89.67 - lr: 0.025000
874
+ 2021-12-31 10:31:45,084 epoch 35 - iter 720/1807 - loss 0.05982168 - samples/sec: 90.10 - lr: 0.025000
875
+ 2021-12-31 10:32:00,692 epoch 35 - iter 900/1807 - loss 0.05928538 - samples/sec: 92.39 - lr: 0.025000
876
+ 2021-12-31 10:32:16,615 epoch 35 - iter 1080/1807 - loss 0.05961166 - samples/sec: 90.58 - lr: 0.025000
877
+ 2021-12-31 10:32:32,475 epoch 35 - iter 1260/1807 - loss 0.06019352 - samples/sec: 90.93 - lr: 0.025000
878
+ 2021-12-31 10:32:48,494 epoch 35 - iter 1440/1807 - loss 0.06020781 - samples/sec: 90.02 - lr: 0.025000
879
+ 2021-12-31 10:33:04,244 epoch 35 - iter 1620/1807 - loss 0.05999299 - samples/sec: 91.57 - lr: 0.025000
880
+ 2021-12-31 10:33:20,684 epoch 35 - iter 1800/1807 - loss 0.05998842 - samples/sec: 87.72 - lr: 0.025000
881
+ 2021-12-31 10:33:21,238 ----------------------------------------------------------------------------------------------------
882
+ 2021-12-31 10:33:21,238 EPOCH 35 done: loss 0.0600 - lr 0.0250000
883
+ 2021-12-31 10:33:57,434 DEV : loss 0.06338120251893997 - f1-score (micro avg) 0.9829
884
+ 2021-12-31 10:33:57,624 BAD EPOCHS (no improvement): 1
885
+ 2021-12-31 10:33:57,626 ----------------------------------------------------------------------------------------------------
886
+ 2021-12-31 10:34:13,768 epoch 36 - iter 180/1807 - loss 0.06028850 - samples/sec: 89.35 - lr: 0.025000
887
+ 2021-12-31 10:34:29,556 epoch 36 - iter 360/1807 - loss 0.05827195 - samples/sec: 91.34 - lr: 0.025000
888
+ 2021-12-31 10:34:46,060 epoch 36 - iter 540/1807 - loss 0.05947832 - samples/sec: 87.38 - lr: 0.025000
889
+ 2021-12-31 10:35:02,018 epoch 36 - iter 720/1807 - loss 0.05898679 - samples/sec: 90.38 - lr: 0.025000
890
+ 2021-12-31 10:35:18,203 epoch 36 - iter 900/1807 - loss 0.05910041 - samples/sec: 89.10 - lr: 0.025000
891
+ 2021-12-31 10:35:34,254 epoch 36 - iter 1080/1807 - loss 0.05973540 - samples/sec: 89.84 - lr: 0.025000
892
+ 2021-12-31 10:35:50,256 epoch 36 - iter 1260/1807 - loss 0.05924335 - samples/sec: 90.13 - lr: 0.025000
893
+ 2021-12-31 10:36:06,236 epoch 36 - iter 1440/1807 - loss 0.05881263 - samples/sec: 90.25 - lr: 0.025000
894
+ 2021-12-31 10:36:22,117 epoch 36 - iter 1620/1807 - loss 0.05885928 - samples/sec: 90.80 - lr: 0.025000
895
+ 2021-12-31 10:36:38,208 epoch 36 - iter 1800/1807 - loss 0.05867245 - samples/sec: 89.62 - lr: 0.025000
896
+ 2021-12-31 10:36:38,763 ----------------------------------------------------------------------------------------------------
897
+ 2021-12-31 10:36:38,763 EPOCH 36 done: loss 0.0587 - lr 0.0250000
898
+ 2021-12-31 10:37:17,552 DEV : loss 0.06424003839492798 - f1-score (micro avg) 0.9835
899
+ 2021-12-31 10:37:17,751 BAD EPOCHS (no improvement): 2
900
+ 2021-12-31 10:37:17,752 ----------------------------------------------------------------------------------------------------
901
+ 2021-12-31 10:37:33,804 epoch 37 - iter 180/1807 - loss 0.05692650 - samples/sec: 89.85 - lr: 0.025000
902
+ 2021-12-31 10:37:50,368 epoch 37 - iter 360/1807 - loss 0.05616469 - samples/sec: 87.06 - lr: 0.025000
903
+ 2021-12-31 10:38:06,389 epoch 37 - iter 540/1807 - loss 0.05662717 - samples/sec: 90.01 - lr: 0.025000
904
+ 2021-12-31 10:38:22,399 epoch 37 - iter 720/1807 - loss 0.05716632 - samples/sec: 90.08 - lr: 0.025000
905
+ 2021-12-31 10:38:37,783 epoch 37 - iter 900/1807 - loss 0.05713545 - samples/sec: 93.74 - lr: 0.025000
906
+ 2021-12-31 10:38:53,871 epoch 37 - iter 1080/1807 - loss 0.05764661 - samples/sec: 89.64 - lr: 0.025000
907
+ 2021-12-31 10:39:10,031 epoch 37 - iter 1260/1807 - loss 0.05713711 - samples/sec: 89.23 - lr: 0.025000
908
+ 2021-12-31 10:39:25,737 epoch 37 - iter 1440/1807 - loss 0.05769197 - samples/sec: 91.83 - lr: 0.025000
909
+ 2021-12-31 10:39:41,486 epoch 37 - iter 1620/1807 - loss 0.05788084 - samples/sec: 91.57 - lr: 0.025000
910
+ 2021-12-31 10:39:57,218 epoch 37 - iter 1800/1807 - loss 0.05864320 - samples/sec: 91.67 - lr: 0.025000
911
+ 2021-12-31 10:39:57,747 ----------------------------------------------------------------------------------------------------
912
+ 2021-12-31 10:39:57,748 EPOCH 37 done: loss 0.0586 - lr 0.0250000
913
+ 2021-12-31 10:40:34,869 DEV : loss 0.06326954811811447 - f1-score (micro avg) 0.9831
914
+ 2021-12-31 10:40:35,052 BAD EPOCHS (no improvement): 3
915
+ 2021-12-31 10:40:35,054 ----------------------------------------------------------------------------------------------------
916
+ 2021-12-31 10:40:51,312 epoch 38 - iter 180/1807 - loss 0.05496563 - samples/sec: 88.71 - lr: 0.025000
917
+ 2021-12-31 10:41:07,088 epoch 38 - iter 360/1807 - loss 0.05435886 - samples/sec: 91.42 - lr: 0.025000
918
+ 2021-12-31 10:41:22,841 epoch 38 - iter 540/1807 - loss 0.05464384 - samples/sec: 91.55 - lr: 0.025000
919
+ 2021-12-31 10:41:38,398 epoch 38 - iter 720/1807 - loss 0.05548335 - samples/sec: 92.69 - lr: 0.025000
920
+ 2021-12-31 10:41:54,754 epoch 38 - iter 900/1807 - loss 0.05628518 - samples/sec: 88.18 - lr: 0.025000
921
+ 2021-12-31 10:42:10,229 epoch 38 - iter 1080/1807 - loss 0.05604961 - samples/sec: 93.19 - lr: 0.025000
922
+ 2021-12-31 10:42:26,417 epoch 38 - iter 1260/1807 - loss 0.05594531 - samples/sec: 89.09 - lr: 0.025000
923
+ 2021-12-31 10:42:42,839 epoch 38 - iter 1440/1807 - loss 0.05651329 - samples/sec: 87.81 - lr: 0.025000
924
+ 2021-12-31 10:42:58,889 epoch 38 - iter 1620/1807 - loss 0.05695998 - samples/sec: 89.85 - lr: 0.025000
925
+ 2021-12-31 10:43:15,043 epoch 38 - iter 1800/1807 - loss 0.05706783 - samples/sec: 89.27 - lr: 0.025000
926
+ 2021-12-31 10:43:15,590 ----------------------------------------------------------------------------------------------------
927
+ 2021-12-31 10:43:15,590 EPOCH 38 done: loss 0.0570 - lr 0.0250000
928
+ 2021-12-31 10:43:52,423 DEV : loss 0.06343492120504379 - f1-score (micro avg) 0.9831
929
+ 2021-12-31 10:43:52,610 BAD EPOCHS (no improvement): 4
930
+ 2021-12-31 10:43:52,612 ----------------------------------------------------------------------------------------------------
931
+ 2021-12-31 10:44:08,739 epoch 39 - iter 180/1807 - loss 0.05834451 - samples/sec: 89.43 - lr: 0.012500
932
+ 2021-12-31 10:44:24,462 epoch 39 - iter 360/1807 - loss 0.05496382 - samples/sec: 91.72 - lr: 0.012500
933
+ 2021-12-31 10:44:40,570 epoch 39 - iter 540/1807 - loss 0.05537094 - samples/sec: 89.53 - lr: 0.012500
934
+ 2021-12-31 10:44:56,434 epoch 39 - iter 720/1807 - loss 0.05546561 - samples/sec: 90.90 - lr: 0.012500
935
+ 2021-12-31 10:45:12,338 epoch 39 - iter 900/1807 - loss 0.05527723 - samples/sec: 90.67 - lr: 0.012500
936
+ 2021-12-31 10:45:27,903 epoch 39 - iter 1080/1807 - loss 0.05518412 - samples/sec: 92.65 - lr: 0.012500
937
+ 2021-12-31 10:45:43,777 epoch 39 - iter 1260/1807 - loss 0.05540916 - samples/sec: 90.86 - lr: 0.012500
938
+ 2021-12-31 10:45:59,259 epoch 39 - iter 1440/1807 - loss 0.05568263 - samples/sec: 93.15 - lr: 0.012500
939
+ 2021-12-31 10:46:15,024 epoch 39 - iter 1620/1807 - loss 0.05532678 - samples/sec: 91.47 - lr: 0.012500
940
+ 2021-12-31 10:46:30,975 epoch 39 - iter 1800/1807 - loss 0.05524694 - samples/sec: 90.40 - lr: 0.012500
941
+ 2021-12-31 10:46:31,584 ----------------------------------------------------------------------------------------------------
942
+ 2021-12-31 10:46:31,585 EPOCH 39 done: loss 0.0552 - lr 0.0125000
943
+ 2021-12-31 10:47:10,908 DEV : loss 0.06419230252504349 - f1-score (micro avg) 0.9829
944
+ 2021-12-31 10:47:11,105 BAD EPOCHS (no improvement): 1
945
+ 2021-12-31 10:47:11,106 ----------------------------------------------------------------------------------------------------
946
+ 2021-12-31 10:47:26,949 epoch 40 - iter 180/1807 - loss 0.05824543 - samples/sec: 91.06 - lr: 0.012500
947
+ 2021-12-31 10:47:42,913 epoch 40 - iter 360/1807 - loss 0.05527233 - samples/sec: 90.33 - lr: 0.012500
948
+ 2021-12-31 10:47:59,224 epoch 40 - iter 540/1807 - loss 0.05570769 - samples/sec: 88.41 - lr: 0.012500
949
+ 2021-12-31 10:48:14,703 epoch 40 - iter 720/1807 - loss 0.05485811 - samples/sec: 93.17 - lr: 0.012500
950
+ 2021-12-31 10:48:30,458 epoch 40 - iter 900/1807 - loss 0.05502772 - samples/sec: 91.54 - lr: 0.012500
951
+ 2021-12-31 10:48:46,369 epoch 40 - iter 1080/1807 - loss 0.05487373 - samples/sec: 90.63 - lr: 0.012500
952
+ 2021-12-31 10:49:01,734 epoch 40 - iter 1260/1807 - loss 0.05438047 - samples/sec: 93.85 - lr: 0.012500
953
+ 2021-12-31 10:49:17,649 epoch 40 - iter 1440/1807 - loss 0.05459548 - samples/sec: 90.61 - lr: 0.012500
954
+ 2021-12-31 10:49:33,390 epoch 40 - iter 1620/1807 - loss 0.05450567 - samples/sec: 91.62 - lr: 0.012500
955
+ 2021-12-31 10:49:49,353 epoch 40 - iter 1800/1807 - loss 0.05462945 - samples/sec: 90.34 - lr: 0.012500
956
+ 2021-12-31 10:49:49,959 ----------------------------------------------------------------------------------------------------
957
+ 2021-12-31 10:49:49,959 EPOCH 40 done: loss 0.0546 - lr 0.0125000
958
+ 2021-12-31 10:50:26,216 DEV : loss 0.06343018263578415 - f1-score (micro avg) 0.9829
959
+ 2021-12-31 10:50:26,401 BAD EPOCHS (no improvement): 2
960
+ 2021-12-31 10:50:26,402 ----------------------------------------------------------------------------------------------------
961
+ 2021-12-31 10:50:42,801 epoch 41 - iter 180/1807 - loss 0.04923909 - samples/sec: 87.95 - lr: 0.012500
962
+ 2021-12-31 10:50:58,898 epoch 41 - iter 360/1807 - loss 0.05125288 - samples/sec: 89.59 - lr: 0.012500
963
+ 2021-12-31 10:51:14,501 epoch 41 - iter 540/1807 - loss 0.05242298 - samples/sec: 92.43 - lr: 0.012500
964
+ 2021-12-31 10:51:30,244 epoch 41 - iter 720/1807 - loss 0.05272643 - samples/sec: 91.60 - lr: 0.012500
965
+ 2021-12-31 10:51:46,266 epoch 41 - iter 900/1807 - loss 0.05277145 - samples/sec: 90.01 - lr: 0.012500
966
+ 2021-12-31 10:52:02,535 epoch 41 - iter 1080/1807 - loss 0.05329680 - samples/sec: 88.64 - lr: 0.012500
967
+ 2021-12-31 10:52:18,362 epoch 41 - iter 1260/1807 - loss 0.05349535 - samples/sec: 91.12 - lr: 0.012500
968
+ 2021-12-31 10:52:34,324 epoch 41 - iter 1440/1807 - loss 0.05371268 - samples/sec: 90.35 - lr: 0.012500
969
+ 2021-12-31 10:52:50,154 epoch 41 - iter 1620/1807 - loss 0.05362217 - samples/sec: 91.09 - lr: 0.012500
970
+ 2021-12-31 10:53:06,114 epoch 41 - iter 1800/1807 - loss 0.05361560 - samples/sec: 90.36 - lr: 0.012500
971
+ 2021-12-31 10:53:06,648 ----------------------------------------------------------------------------------------------------
972
+ 2021-12-31 10:53:06,649 EPOCH 41 done: loss 0.0537 - lr 0.0125000
973
+ 2021-12-31 10:53:42,920 DEV : loss 0.06420625746250153 - f1-score (micro avg) 0.9831
974
+ 2021-12-31 10:53:43,107 BAD EPOCHS (no improvement): 3
975
+ 2021-12-31 10:53:43,108 ----------------------------------------------------------------------------------------------------
976
+ 2021-12-31 10:53:59,320 epoch 42 - iter 180/1807 - loss 0.04886676 - samples/sec: 88.96 - lr: 0.012500
977
+ 2021-12-31 10:54:15,301 epoch 42 - iter 360/1807 - loss 0.05210812 - samples/sec: 90.24 - lr: 0.012500
978
+ 2021-12-31 10:54:31,014 epoch 42 - iter 540/1807 - loss 0.05220145 - samples/sec: 91.78 - lr: 0.012500
979
+ 2021-12-31 10:54:46,930 epoch 42 - iter 720/1807 - loss 0.05239133 - samples/sec: 90.61 - lr: 0.012500
980
+ 2021-12-31 10:55:02,977 epoch 42 - iter 900/1807 - loss 0.05260141 - samples/sec: 89.87 - lr: 0.012500
981
+ 2021-12-31 10:55:19,228 epoch 42 - iter 1080/1807 - loss 0.05260187 - samples/sec: 88.74 - lr: 0.012500
982
+ 2021-12-31 10:55:35,215 epoch 42 - iter 1260/1807 - loss 0.05242910 - samples/sec: 90.21 - lr: 0.012500
983
+ 2021-12-31 10:55:51,163 epoch 42 - iter 1440/1807 - loss 0.05265492 - samples/sec: 90.43 - lr: 0.012500
984
+ 2021-12-31 10:56:07,328 epoch 42 - iter 1620/1807 - loss 0.05317972 - samples/sec: 89.21 - lr: 0.012500
985
+ 2021-12-31 10:56:23,405 epoch 42 - iter 1800/1807 - loss 0.05319734 - samples/sec: 89.70 - lr: 0.012500
986
+ 2021-12-31 10:56:23,951 ----------------------------------------------------------------------------------------------------
987
+ 2021-12-31 10:56:23,951 EPOCH 42 done: loss 0.0532 - lr 0.0125000
988
+ 2021-12-31 10:57:03,168 DEV : loss 0.06362675130367279 - f1-score (micro avg) 0.9831
989
+ 2021-12-31 10:57:03,368 BAD EPOCHS (no improvement): 4
990
+ 2021-12-31 10:57:03,370 ----------------------------------------------------------------------------------------------------
991
+ 2021-12-31 10:57:19,009 epoch 43 - iter 180/1807 - loss 0.05496817 - samples/sec: 92.23 - lr: 0.006250
992
+ 2021-12-31 10:57:34,952 epoch 43 - iter 360/1807 - loss 0.05262157 - samples/sec: 90.45 - lr: 0.006250
993
+ 2021-12-31 10:57:51,104 epoch 43 - iter 540/1807 - loss 0.05252708 - samples/sec: 89.28 - lr: 0.006250
994
+ 2021-12-31 10:58:06,630 epoch 43 - iter 720/1807 - loss 0.05258453 - samples/sec: 92.89 - lr: 0.006250
995
+ 2021-12-31 10:58:22,297 epoch 43 - iter 900/1807 - loss 0.05170441 - samples/sec: 92.05 - lr: 0.006250
996
+ 2021-12-31 10:58:38,636 epoch 43 - iter 1080/1807 - loss 0.05199907 - samples/sec: 88.26 - lr: 0.006250
997
+ 2021-12-31 10:58:54,582 epoch 43 - iter 1260/1807 - loss 0.05289598 - samples/sec: 90.42 - lr: 0.006250
998
+ 2021-12-31 10:59:10,756 epoch 43 - iter 1440/1807 - loss 0.05239565 - samples/sec: 89.17 - lr: 0.006250
999
+ 2021-12-31 10:59:26,756 epoch 43 - iter 1620/1807 - loss 0.05245197 - samples/sec: 90.14 - lr: 0.006250
1000
+ 2021-12-31 10:59:43,140 epoch 43 - iter 1800/1807 - loss 0.05236153 - samples/sec: 88.01 - lr: 0.006250
1001
+ 2021-12-31 10:59:43,734 ----------------------------------------------------------------------------------------------------
1002
+ 2021-12-31 10:59:43,734 EPOCH 43 done: loss 0.0523 - lr 0.0062500
1003
+ 2021-12-31 11:00:19,875 DEV : loss 0.06449297815561295 - f1-score (micro avg) 0.983
1004
+ 2021-12-31 11:00:20,058 BAD EPOCHS (no improvement): 1
1005
+ 2021-12-31 11:00:20,060 ----------------------------------------------------------------------------------------------------
1006
+ 2021-12-31 11:00:36,054 epoch 44 - iter 180/1807 - loss 0.05668095 - samples/sec: 90.17 - lr: 0.006250
1007
+ 2021-12-31 11:00:51,879 epoch 44 - iter 360/1807 - loss 0.05376107 - samples/sec: 91.13 - lr: 0.006250
1008
+ 2021-12-31 11:01:07,774 epoch 44 - iter 540/1807 - loss 0.05410164 - samples/sec: 90.73 - lr: 0.006250
1009
+ 2021-12-31 11:01:23,539 epoch 44 - iter 720/1807 - loss 0.05349578 - samples/sec: 91.47 - lr: 0.006250
1010
+ 2021-12-31 11:01:39,511 epoch 44 - iter 900/1807 - loss 0.05316904 - samples/sec: 90.29 - lr: 0.006250
1011
+ 2021-12-31 11:01:55,495 epoch 44 - iter 1080/1807 - loss 0.05360298 - samples/sec: 90.23 - lr: 0.006250
1012
+ 2021-12-31 11:02:11,974 epoch 44 - iter 1260/1807 - loss 0.05360002 - samples/sec: 87.52 - lr: 0.006250
1013
+ 2021-12-31 11:02:27,697 epoch 44 - iter 1440/1807 - loss 0.05333331 - samples/sec: 91.72 - lr: 0.006250
1014
+ 2021-12-31 11:02:43,120 epoch 44 - iter 1620/1807 - loss 0.05286587 - samples/sec: 93.50 - lr: 0.006250
1015
+ 2021-12-31 11:02:58,798 epoch 44 - iter 1800/1807 - loss 0.05270956 - samples/sec: 91.99 - lr: 0.006250
1016
+ 2021-12-31 11:02:59,351 ----------------------------------------------------------------------------------------------------
1017
+ 2021-12-31 11:02:59,352 EPOCH 44 done: loss 0.0527 - lr 0.0062500
1018
+ 2021-12-31 11:03:35,832 DEV : loss 0.06455685943365097 - f1-score (micro avg) 0.9831
1019
+ 2021-12-31 11:03:36,019 BAD EPOCHS (no improvement): 2
1020
+ 2021-12-31 11:03:36,021 ----------------------------------------------------------------------------------------------------
1021
+ 2021-12-31 11:03:52,202 epoch 45 - iter 180/1807 - loss 0.05063292 - samples/sec: 89.13 - lr: 0.006250
1022
+ 2021-12-31 11:04:08,225 epoch 45 - iter 360/1807 - loss 0.05171673 - samples/sec: 90.00 - lr: 0.006250
1023
+ 2021-12-31 11:04:24,263 epoch 45 - iter 540/1807 - loss 0.05167432 - samples/sec: 89.93 - lr: 0.006250
1024
+ 2021-12-31 11:04:40,362 epoch 45 - iter 720/1807 - loss 0.05121190 - samples/sec: 89.58 - lr: 0.006250
1025
+ 2021-12-31 11:04:56,274 epoch 45 - iter 900/1807 - loss 0.05221446 - samples/sec: 90.63 - lr: 0.006250
1026
+ 2021-12-31 11:05:12,479 epoch 45 - iter 1080/1807 - loss 0.05188940 - samples/sec: 88.99 - lr: 0.006250
1027
+ 2021-12-31 11:05:28,572 epoch 45 - iter 1260/1807 - loss 0.05237022 - samples/sec: 89.62 - lr: 0.006250
1028
+ 2021-12-31 11:05:44,476 epoch 45 - iter 1440/1807 - loss 0.05180768 - samples/sec: 90.68 - lr: 0.006250
1029
+ 2021-12-31 11:06:00,356 epoch 45 - iter 1620/1807 - loss 0.05176296 - samples/sec: 90.81 - lr: 0.006250
1030
+ 2021-12-31 11:06:16,343 epoch 45 - iter 1800/1807 - loss 0.05236414 - samples/sec: 90.20 - lr: 0.006250
1031
+ 2021-12-31 11:06:16,948 ----------------------------------------------------------------------------------------------------
1032
+ 2021-12-31 11:06:16,949 EPOCH 45 done: loss 0.0523 - lr 0.0062500
1033
+ 2021-12-31 11:06:56,269 DEV : loss 0.06413871794939041 - f1-score (micro avg) 0.983
1034
+ 2021-12-31 11:06:56,425 BAD EPOCHS (no improvement): 3
1035
+ 2021-12-31 11:06:56,427 ----------------------------------------------------------------------------------------------------
1036
+ 2021-12-31 11:07:12,359 epoch 46 - iter 180/1807 - loss 0.04909660 - samples/sec: 90.52 - lr: 0.006250
1037
+ 2021-12-31 11:07:27,933 epoch 46 - iter 360/1807 - loss 0.04990439 - samples/sec: 92.58 - lr: 0.006250
1038
+ 2021-12-31 11:07:44,036 epoch 46 - iter 540/1807 - loss 0.05183261 - samples/sec: 89.55 - lr: 0.006250
1039
+ 2021-12-31 11:07:59,808 epoch 46 - iter 720/1807 - loss 0.05108367 - samples/sec: 91.44 - lr: 0.006250
1040
+ 2021-12-31 11:08:16,323 epoch 46 - iter 900/1807 - loss 0.05156129 - samples/sec: 87.33 - lr: 0.006250
1041
+ 2021-12-31 11:08:32,181 epoch 46 - iter 1080/1807 - loss 0.05164911 - samples/sec: 90.93 - lr: 0.006250
1042
+ 2021-12-31 11:08:48,124 epoch 46 - iter 1260/1807 - loss 0.05241189 - samples/sec: 90.45 - lr: 0.006250
1043
+ 2021-12-31 11:09:04,600 epoch 46 - iter 1440/1807 - loss 0.05209220 - samples/sec: 87.53 - lr: 0.006250
1044
+ 2021-12-31 11:09:20,227 epoch 46 - iter 1620/1807 - loss 0.05187081 - samples/sec: 92.29 - lr: 0.006250
1045
+ 2021-12-31 11:09:36,191 epoch 46 - iter 1800/1807 - loss 0.05205935 - samples/sec: 90.34 - lr: 0.006250
1046
+ 2021-12-31 11:09:36,782 ----------------------------------------------------------------------------------------------------
1047
+ 2021-12-31 11:09:36,782 EPOCH 46 done: loss 0.0521 - lr 0.0062500
1048
+ 2021-12-31 11:10:13,201 DEV : loss 0.0644669309258461 - f1-score (micro avg) 0.983
1049
+ 2021-12-31 11:10:13,398 BAD EPOCHS (no improvement): 4
1050
+ 2021-12-31 11:10:13,399 ----------------------------------------------------------------------------------------------------
1051
+ 2021-12-31 11:10:29,417 epoch 47 - iter 180/1807 - loss 0.05250873 - samples/sec: 90.04 - lr: 0.003125
1052
+ 2021-12-31 11:10:45,589 epoch 47 - iter 360/1807 - loss 0.05160928 - samples/sec: 89.18 - lr: 0.003125
1053
+ 2021-12-31 11:11:01,280 epoch 47 - iter 540/1807 - loss 0.05161492 - samples/sec: 91.91 - lr: 0.003125
1054
+ 2021-12-31 11:11:17,277 epoch 47 - iter 720/1807 - loss 0.05136337 - samples/sec: 90.15 - lr: 0.003125
1055
+ 2021-12-31 11:11:33,230 epoch 47 - iter 900/1807 - loss 0.05023989 - samples/sec: 90.40 - lr: 0.003125
1056
+ 2021-12-31 11:11:49,156 epoch 47 - iter 1080/1807 - loss 0.05064277 - samples/sec: 90.55 - lr: 0.003125
1057
+ 2021-12-31 11:12:04,959 epoch 47 - iter 1260/1807 - loss 0.05089925 - samples/sec: 91.25 - lr: 0.003125
1058
+ 2021-12-31 11:12:21,092 epoch 47 - iter 1440/1807 - loss 0.05071923 - samples/sec: 89.39 - lr: 0.003125
1059
+ 2021-12-31 11:12:36,949 epoch 47 - iter 1620/1807 - loss 0.05083516 - samples/sec: 90.95 - lr: 0.003125
1060
+ 2021-12-31 11:12:52,744 epoch 47 - iter 1800/1807 - loss 0.05106443 - samples/sec: 91.31 - lr: 0.003125
1061
+ 2021-12-31 11:12:53,321 ----------------------------------------------------------------------------------------------------
1062
+ 2021-12-31 11:12:53,321 EPOCH 47 done: loss 0.0511 - lr 0.0031250
1063
+ 2021-12-31 11:13:29,490 DEV : loss 0.06470787525177002 - f1-score (micro avg) 0.9829
1064
+ 2021-12-31 11:13:29,672 BAD EPOCHS (no improvement): 1
1065
+ 2021-12-31 11:13:29,674 ----------------------------------------------------------------------------------------------------
1066
+ 2021-12-31 11:13:45,987 epoch 48 - iter 180/1807 - loss 0.05119727 - samples/sec: 88.41 - lr: 0.003125
1067
+ 2021-12-31 11:14:02,271 epoch 48 - iter 360/1807 - loss 0.05026057 - samples/sec: 88.57 - lr: 0.003125
1068
+ 2021-12-31 11:14:18,202 epoch 48 - iter 540/1807 - loss 0.04968790 - samples/sec: 90.53 - lr: 0.003125
1069
+ 2021-12-31 11:14:33,834 epoch 48 - iter 720/1807 - loss 0.05040465 - samples/sec: 92.25 - lr: 0.003125
1070
+ 2021-12-31 11:14:49,709 epoch 48 - iter 900/1807 - loss 0.05065504 - samples/sec: 90.84 - lr: 0.003125
1071
+ 2021-12-31 11:15:05,727 epoch 48 - iter 1080/1807 - loss 0.05037297 - samples/sec: 90.02 - lr: 0.003125
1072
+ 2021-12-31 11:15:21,077 epoch 48 - iter 1260/1807 - loss 0.05063199 - samples/sec: 93.96 - lr: 0.003125
1073
+ 2021-12-31 11:15:36,587 epoch 48 - iter 1440/1807 - loss 0.05076731 - samples/sec: 92.98 - lr: 0.003125
1074
+ 2021-12-31 11:15:52,489 epoch 48 - iter 1620/1807 - loss 0.05082260 - samples/sec: 90.68 - lr: 0.003125
1075
+ 2021-12-31 11:16:08,520 epoch 48 - iter 1800/1807 - loss 0.05101165 - samples/sec: 89.96 - lr: 0.003125
1076
+ 2021-12-31 11:16:09,115 ----------------------------------------------------------------------------------------------------
1077
+ 2021-12-31 11:16:09,116 EPOCH 48 done: loss 0.0510 - lr 0.0031250
1078
+ 2021-12-31 11:16:48,035 DEV : loss 0.06484530121088028 - f1-score (micro avg) 0.983
1079
+ 2021-12-31 11:16:48,189 BAD EPOCHS (no improvement): 2
1080
+ 2021-12-31 11:16:48,191 ----------------------------------------------------------------------------------------------------
1081
+ 2021-12-31 11:17:03,775 epoch 49 - iter 180/1807 - loss 0.04706234 - samples/sec: 92.51 - lr: 0.003125
1082
+ 2021-12-31 11:17:19,604 epoch 49 - iter 360/1807 - loss 0.04796051 - samples/sec: 91.07 - lr: 0.003125
1083
+ 2021-12-31 11:17:35,506 epoch 49 - iter 540/1807 - loss 0.04820802 - samples/sec: 90.67 - lr: 0.003125
1084
+ 2021-12-31 11:17:51,301 epoch 49 - iter 720/1807 - loss 0.04872061 - samples/sec: 91.31 - lr: 0.003125
1085
+ 2021-12-31 11:18:06,963 epoch 49 - iter 900/1807 - loss 0.04900955 - samples/sec: 92.08 - lr: 0.003125
1086
+ 2021-12-31 11:18:22,961 epoch 49 - iter 1080/1807 - loss 0.04952427 - samples/sec: 90.14 - lr: 0.003125
1087
+ 2021-12-31 11:18:39,172 epoch 49 - iter 1260/1807 - loss 0.04981242 - samples/sec: 88.96 - lr: 0.003125
1088
+ 2021-12-31 11:18:55,485 epoch 49 - iter 1440/1807 - loss 0.05015633 - samples/sec: 88.41 - lr: 0.003125
1089
+ 2021-12-31 11:19:11,166 epoch 49 - iter 1620/1807 - loss 0.05076498 - samples/sec: 91.97 - lr: 0.003125
1090
+ 2021-12-31 11:19:27,065 epoch 49 - iter 1800/1807 - loss 0.05104387 - samples/sec: 90.71 - lr: 0.003125
1091
+ 2021-12-31 11:19:27,675 ----------------------------------------------------------------------------------------------------
1092
+ 2021-12-31 11:19:27,675 EPOCH 49 done: loss 0.0510 - lr 0.0031250
1093
+ 2021-12-31 11:20:04,021 DEV : loss 0.06486314535140991 - f1-score (micro avg) 0.983
1094
+ 2021-12-31 11:20:04,217 BAD EPOCHS (no improvement): 3
1095
+ 2021-12-31 11:20:04,218 ----------------------------------------------------------------------------------------------------
1096
+ 2021-12-31 11:20:20,650 epoch 50 - iter 180/1807 - loss 0.05726933 - samples/sec: 87.77 - lr: 0.003125
1097
+ 2021-12-31 11:20:36,455 epoch 50 - iter 360/1807 - loss 0.05538766 - samples/sec: 91.25 - lr: 0.003125
1098
+ 2021-12-31 11:20:52,012 epoch 50 - iter 540/1807 - loss 0.05444601 - samples/sec: 92.69 - lr: 0.003125
1099
+ 2021-12-31 11:21:07,973 epoch 50 - iter 720/1807 - loss 0.05313637 - samples/sec: 90.35 - lr: 0.003125
1100
+ 2021-12-31 11:21:23,983 epoch 50 - iter 900/1807 - loss 0.05290526 - samples/sec: 90.08 - lr: 0.003125
1101
+ 2021-12-31 11:21:39,924 epoch 50 - iter 1080/1807 - loss 0.05235234 - samples/sec: 90.47 - lr: 0.003125
1102
+ 2021-12-31 11:21:55,732 epoch 50 - iter 1260/1807 - loss 0.05207690 - samples/sec: 91.23 - lr: 0.003125
1103
+ 2021-12-31 11:22:11,663 epoch 50 - iter 1440/1807 - loss 0.05205514 - samples/sec: 90.52 - lr: 0.003125
1104
+ 2021-12-31 11:22:27,392 epoch 50 - iter 1620/1807 - loss 0.05173851 - samples/sec: 91.69 - lr: 0.003125
1105
+ 2021-12-31 11:22:43,193 epoch 50 - iter 1800/1807 - loss 0.05189058 - samples/sec: 91.27 - lr: 0.003125
1106
+ 2021-12-31 11:22:43,750 ----------------------------------------------------------------------------------------------------
1107
+ 2021-12-31 11:22:43,750 EPOCH 50 done: loss 0.0519 - lr 0.0031250
1108
+ 2021-12-31 11:23:20,432 DEV : loss 0.06452730298042297 - f1-score (micro avg) 0.9831
1109
+ 2021-12-31 11:23:20,619 BAD EPOCHS (no improvement): 4
1110
+ 2021-12-31 11:23:25,890 ----------------------------------------------------------------------------------------------------
1111
+ 2021-12-31 11:23:25,893 loading file models/UPOS_UD_FRENCH_GSD_PLUS_Flair-Embeddings_50_2021-12-31-08:34:44/best-model.pt
1112
+ 2021-12-31 11:23:43,354 0.9797 0.9797 0.9797 0.9797
1113
+ 2021-12-31 11:23:43,354
1114
+ Results:
1115
+ - F-score (micro) 0.9797
1116
+ - F-score (macro) 0.9178
1117
+ - Accuracy 0.9797
1118
+
1119
+ By class:
1120
+ precision recall f1-score support
1121
+
1122
+ PREP 0.9966 0.9987 0.9976 1483
1123
+ PUNCT 1.0000 1.0000 1.0000 833
1124
+ NMS 0.9634 0.9801 0.9717 753
1125
+ DET 0.9923 0.9984 0.9954 645
1126
+ VERB 0.9913 0.9811 0.9862 583
1127
+ NFS 0.9667 0.9839 0.9752 560
1128
+ ADV 0.9940 0.9821 0.9880 504
1129
+ PROPN 0.9541 0.8937 0.9229 395
1130
+ DETMS 1.0000 1.0000 1.0000 362
1131
+ AUX 0.9860 0.9915 0.9888 355
1132
+ YPFOR 1.0000 1.0000 1.0000 353
1133
+ NMP 0.9666 0.9475 0.9570 305
1134
+ COCO 0.9959 1.0000 0.9980 245
1135
+ ADJMS 0.9463 0.9385 0.9424 244
1136
+ DETFS 1.0000 1.0000 1.0000 240
1137
+ CHIF 0.9648 0.9865 0.9755 222
1138
+ NFP 0.9515 0.9849 0.9679 199
1139
+ ADJFS 0.9657 0.9286 0.9468 182
1140
+ VPPMS 0.9387 0.9745 0.9563 157
1141
+ COSUB 1.0000 0.9844 0.9921 128
1142
+ DINTMS 0.9918 0.9918 0.9918 122
1143
+ XFAMIL 0.9298 0.9217 0.9258 115
1144
+ PPER3MS 1.0000 1.0000 1.0000 87
1145
+ ADJMP 0.9294 0.9634 0.9461 82
1146
+ PDEMMS 1.0000 1.0000 1.0000 75
1147
+ ADJFP 0.9861 0.9342 0.9595 76
1148
+ PREL 0.9859 1.0000 0.9929 70
1149
+ DINTFS 0.9839 1.0000 0.9919 61
1150
+ PREF 1.0000 1.0000 1.0000 52
1151
+ PPOBJMS 0.9565 0.9362 0.9462 47
1152
+ PREFP 0.9778 1.0000 0.9888 44
1153
+ PINDMS 1.0000 0.9773 0.9885 44
1154
+ VPPFS 0.8298 0.9750 0.8966 40
1155
+ PPER1S 1.0000 1.0000 1.0000 42
1156
+ SYM 1.0000 0.9474 0.9730 38
1157
+ NOUN 0.8824 0.7692 0.8219 39
1158
+ PRON 1.0000 0.9677 0.9836 31
1159
+ PDEMFS 1.0000 1.0000 1.0000 29
1160
+ VPPMP 0.9286 1.0000 0.9630 26
1161
+ ADJ 0.9524 0.9091 0.9302 22
1162
+ PPER3MP 1.0000 1.0000 1.0000 20
1163
+ VPPFP 1.0000 1.0000 1.0000 19
1164
+ PPER3FS 1.0000 1.0000 1.0000 18
1165
+ MOTINC 0.3333 0.4000 0.3636 15
1166
+ PREFS 1.0000 1.0000 1.0000 10
1167
+ PPOBJMP 1.0000 0.8000 0.8889 10
1168
+ PPOBJFS 0.6250 0.8333 0.7143 6
1169
+ INTJ 0.5000 0.6667 0.5714 6
1170
+ PART 1.0000 1.0000 1.0000 4
1171
+ PDEMMP 1.0000 1.0000 1.0000 3
1172
+ PDEMFP 1.0000 1.0000 1.0000 3
1173
+ PPER3FP 1.0000 1.0000 1.0000 2
1174
+ NUM 1.0000 0.3333 0.5000 3
1175
+ PPER2S 1.0000 1.0000 1.0000 2
1176
+ PPOBJFP 0.5000 0.5000 0.5000 2
1177
+ PRELMS 1.0000 1.0000 1.0000 2
1178
+ PINDFS 0.5000 1.0000 0.6667 1
1179
+ PINDMP 1.0000 1.0000 1.0000 1
1180
+ X 0.0000 0.0000 0.0000 1
1181
+ PINDFP 1.0000 1.0000 1.0000 1
1182
+
1183
+ micro avg 0.9797 0.9797 0.9797 10019
1184
+ macro avg 0.9228 0.9230 0.9178 10019
1185
+ weighted avg 0.9802 0.9797 0.9798 10019
1186
+ samples avg 0.9797 0.9797 0.9797 10019
1187
+
1188
+ 2021-12-31 11:23:43,354 ----------------------------------------------------------------------------------------------------
weights.txt ADDED
File without changes