|
2022-10-01 00:23:25,105 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:23:25,107 Model: "SequenceTagger( |
|
(embeddings): StackedEmbeddings( |
|
(list_embedding_0): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(119547, 768, padding_idx=0) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(list_embedding_1): FlairEmbeddings( |
|
(lm): LanguageModel( |
|
(drop): Dropout(p=0.5, inplace=False) |
|
(encoder): Embedding(275, 100) |
|
(rnn): LSTM(100, 1024) |
|
(decoder): Linear(in_features=1024, out_features=275, bias=True) |
|
) |
|
) |
|
(list_embedding_2): FlairEmbeddings( |
|
(lm): LanguageModel( |
|
(drop): Dropout(p=0.5, inplace=False) |
|
(encoder): Embedding(275, 100) |
|
(rnn): LSTM(100, 1024) |
|
(decoder): Linear(in_features=1024, out_features=275, bias=True) |
|
) |
|
) |
|
) |
|
(word_dropout): WordDropout(p=0.05) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(embedding2nn): Linear(in_features=2816, out_features=2816, bias=True) |
|
(linear): Linear(in_features=2816, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2022-10-01 00:23:25,114 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:23:25,115 Corpus: "Corpus: 70000 train + 15000 dev + 15000 test sentences" |
|
2022-10-01 00:23:25,115 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:23:25,115 Parameters: |
|
2022-10-01 00:23:25,116 - learning_rate: "0.010000" |
|
2022-10-01 00:23:25,116 - mini_batch_size: "8" |
|
2022-10-01 00:23:25,116 - patience: "3" |
|
2022-10-01 00:23:25,116 - anneal_factor: "0.5" |
|
2022-10-01 00:23:25,116 - max_epochs: "2" |
|
2022-10-01 00:23:25,116 - shuffle: "True" |
|
2022-10-01 00:23:25,117 - train_with_dev: "False" |
|
2022-10-01 00:23:25,117 - batch_growth_annealing: "False" |
|
2022-10-01 00:23:25,117 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:23:25,117 Model training base path: "c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\mix_trans_word" |
|
2022-10-01 00:23:25,117 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:23:25,118 Device: cuda:0 |
|
2022-10-01 00:23:25,118 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:23:25,118 Embeddings storage mode: cpu |
|
2022-10-01 00:23:25,119 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:25:10,652 epoch 1 - iter 875/8750 - loss 0.52734710 - samples/sec: 66.36 - lr: 0.010000 |
|
2022-10-01 00:26:56,050 epoch 1 - iter 1750/8750 - loss 0.40571165 - samples/sec: 66.45 - lr: 0.010000 |
|
2022-10-01 00:28:42,758 epoch 1 - iter 2625/8750 - loss 0.33981350 - samples/sec: 65.63 - lr: 0.010000 |
|
2022-10-01 00:30:27,826 epoch 1 - iter 3500/8750 - loss 0.29553411 - samples/sec: 66.66 - lr: 0.010000 |
|
2022-10-01 00:32:13,605 epoch 1 - iter 4375/8750 - loss 0.26472648 - samples/sec: 66.21 - lr: 0.010000 |
|
2022-10-01 00:33:58,962 epoch 1 - iter 5250/8750 - loss 0.24119392 - samples/sec: 66.47 - lr: 0.010000 |
|
2022-10-01 00:35:44,264 epoch 1 - iter 6125/8750 - loss 0.22350560 - samples/sec: 66.50 - lr: 0.010000 |
|
2022-10-01 00:37:29,676 epoch 1 - iter 7000/8750 - loss 0.20938707 - samples/sec: 66.43 - lr: 0.010000 |
|
2022-10-01 00:39:17,828 epoch 1 - iter 7875/8750 - loss 0.19801233 - samples/sec: 64.75 - lr: 0.010000 |
|
2022-10-01 00:41:05,621 epoch 1 - iter 8750/8750 - loss 0.18900810 - samples/sec: 64.98 - lr: 0.010000 |
|
2022-10-01 00:41:05,624 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:41:05,624 EPOCH 1 done: loss 0.1890 - lr 0.010000 |
|
2022-10-01 00:43:16,083 Evaluating as a multi-label problem: False |
|
2022-10-01 00:43:16,227 DEV : loss 0.06317088007926941 - f1-score (micro avg) 0.9585 |
|
2022-10-01 00:43:17,308 BAD EPOCHS (no improvement): 0 |
|
2022-10-01 00:43:17,309 saving best model |
|
2022-10-01 00:43:18,885 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:45:00,373 epoch 2 - iter 875/8750 - loss 0.09938527 - samples/sec: 69.02 - lr: 0.010000 |
|
2022-10-01 00:46:39,918 epoch 2 - iter 1750/8750 - loss 0.09782604 - samples/sec: 70.36 - lr: 0.010000 |
|
2022-10-01 00:48:19,288 epoch 2 - iter 2625/8750 - loss 0.09732946 - samples/sec: 70.50 - lr: 0.010000 |
|
2022-10-01 00:49:56,913 epoch 2 - iter 3500/8750 - loss 0.09652202 - samples/sec: 71.76 - lr: 0.010000 |
|
2022-10-01 00:51:35,781 epoch 2 - iter 4375/8750 - loss 0.09592801 - samples/sec: 70.86 - lr: 0.010000 |
|
2022-10-01 00:53:12,838 epoch 2 - iter 5250/8750 - loss 0.09478132 - samples/sec: 72.17 - lr: 0.010000 |
|
2022-10-01 00:54:49,247 epoch 2 - iter 6125/8750 - loss 0.09405506 - samples/sec: 72.65 - lr: 0.010000 |
|
2022-10-01 00:56:26,656 epoch 2 - iter 7000/8750 - loss 0.09270363 - samples/sec: 71.90 - lr: 0.010000 |
|
2022-10-01 00:58:04,050 epoch 2 - iter 7875/8750 - loss 0.09222568 - samples/sec: 71.92 - lr: 0.010000 |
|
2022-10-01 00:59:41,351 epoch 2 - iter 8750/8750 - loss 0.09155321 - samples/sec: 71.98 - lr: 0.010000 |
|
2022-10-01 00:59:41,359 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 00:59:41,360 EPOCH 2 done: loss 0.0916 - lr 0.010000 |
|
2022-10-01 01:01:38,941 Evaluating as a multi-label problem: False |
|
2022-10-01 01:01:39,054 DEV : loss 0.04371843859553337 - f1-score (micro avg) 0.9749 |
|
2022-10-01 01:01:40,056 BAD EPOCHS (no improvement): 0 |
|
2022-10-01 01:01:40,058 saving best model |
|
2022-10-01 01:01:42,979 ---------------------------------------------------------------------------------------------------- |
|
2022-10-01 01:01:42,986 loading file c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\mix_trans_word\best-model.pt |
|
2022-10-01 01:01:46,879 SequenceTagger predicts: Dictionary with 13 tags: O, S-brand, B-brand, E-brand, I-brand, S-size, B-size, E-size, I-size, S-color, B-color, E-color, I-color |
|
2022-10-01 01:03:40,258 Evaluating as a multi-label problem: False |
|
2022-10-01 01:03:40,388 0.9719 0.9777 0.9748 0.951 |
|
2022-10-01 01:03:40,389 |
|
Results: |
|
- F-score (micro) 0.9748 |
|
- F-score (macro) 0.9624 |
|
- Accuracy 0.951 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
brand 0.9779 0.9849 0.9814 11779 |
|
size 0.9780 0.9821 0.9800 3125 |
|
color 0.9249 0.9264 0.9256 1915 |
|
|
|
micro avg 0.9719 0.9777 0.9748 16819 |
|
macro avg 0.9603 0.9644 0.9624 16819 |
|
weighted avg 0.9719 0.9777 0.9748 16819 |
|
|
|
2022-10-01 01:03:40,391 ---------------------------------------------------------------------------------------------------- |
|
|