codymd commited on
Commit
b5b7092
1 Parent(s): 171d7ba

End of training

Browse files
README.md CHANGED
@@ -18,14 +18,14 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.1112
22
- - Accuracy: 0.9887
23
- - F1 Macro: 0.7886
24
- - F1 Micro: 0.9888
25
- - Precision Macro: 0.8418
26
- - Precision Micro: 0.9887
27
- - Recall Macro: 0.7568
28
- - Recall Micro: 0.9889
29
 
30
  ## Model description
31
 
@@ -50,18 +50,30 @@ The following hyperparameters were used during training:
50
  - seed: 42
51
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
  - lr_scheduler_type: linear
53
- - num_epochs: 6
54
 
55
  ### Training results
56
 
57
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro | F1 Micro | Precision Macro | Precision Micro | Recall Macro | Recall Micro |
58
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|:--------:|:---------------:|:---------------:|:------------:|:------------:|
59
- | 0.0388 | 1.0 | 93 | 0.1275 | 0.9813 | 0.6122 | 0.9816 | 0.7265 | 0.9816 | 0.5596 | 0.9816 |
60
- | 0.0227 | 2.0 | 186 | 0.1113 | 0.9853 | 0.6907 | 0.9854 | 0.7169 | 0.9853 | 0.6795 | 0.9855 |
61
- | 0.0085 | 3.0 | 279 | 0.1155 | 0.9868 | 0.7550 | 0.9868 | 0.8260 | 0.9868 | 0.7155 | 0.9869 |
62
- | 0.0045 | 4.0 | 372 | 0.1146 | 0.9866 | 0.7407 | 0.9867 | 0.7604 | 0.9866 | 0.7299 | 0.9868 |
63
- | 0.0027 | 5.0 | 465 | 0.1084 | 0.9890 | 0.7935 | 0.9891 | 0.8525 | 0.9890 | 0.7570 | 0.9892 |
64
- | 0.0008 | 6.0 | 558 | 0.1112 | 0.9887 | 0.7886 | 0.9888 | 0.8418 | 0.9887 | 0.7568 | 0.9889 |
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.1052
22
+ - Accuracy: 0.9895
23
+ - F1 Macro: 0.7899
24
+ - F1 Micro: 0.9212
25
+ - Precision Macro: 0.8429
26
+ - Precision Micro: 0.9694
27
+ - Recall Macro: 0.7572
28
+ - Recall Micro: 0.8776
29
 
30
  ## Model description
31
 
 
50
  - seed: 42
51
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
  - lr_scheduler_type: linear
53
+ - num_epochs: 18
54
 
55
  ### Training results
56
 
57
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro | F1 Micro | Precision Macro | Precision Micro | Recall Macro | Recall Micro |
58
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|:--------:|:---------------:|:---------------:|:------------:|:------------:|
59
+ | 0.3673 | 1.0 | 93 | 0.2453 | 0.9284 | 0.1919 | 0.4850 | 0.2701 | 0.4678 | 0.1872 | 0.5035 |
60
+ | 0.2176 | 2.0 | 186 | 0.1888 | 0.9439 | 0.2591 | 0.5149 | 0.3936 | 0.6230 | 0.2298 | 0.4388 |
61
+ | 0.1418 | 3.0 | 279 | 0.1454 | 0.9666 | 0.3554 | 0.725 | 0.4120 | 0.7902 | 0.3577 | 0.6697 |
62
+ | 0.0859 | 4.0 | 372 | 0.1238 | 0.9750 | 0.4365 | 0.7789 | 0.6084 | 0.8540 | 0.3946 | 0.7159 |
63
+ | 0.0607 | 5.0 | 465 | 0.1136 | 0.9766 | 0.4979 | 0.7965 | 0.5945 | 0.8606 | 0.4781 | 0.7413 |
64
+ | 0.0413 | 6.0 | 558 | 0.1103 | 0.9827 | 0.4995 | 0.8608 | 0.6097 | 0.9629 | 0.4415 | 0.7783 |
65
+ | 0.0309 | 7.0 | 651 | 0.1109 | 0.9821 | 0.5654 | 0.8558 | 0.6379 | 0.8842 | 0.5439 | 0.8291 |
66
+ | 0.0237 | 8.0 | 744 | 0.1056 | 0.9847 | 0.6330 | 0.8721 | 0.7169 | 0.9227 | 0.5923 | 0.8268 |
67
+ | 0.0154 | 9.0 | 837 | 0.1009 | 0.9858 | 0.6639 | 0.8816 | 0.7079 | 0.9352 | 0.6422 | 0.8337 |
68
+ | 0.0096 | 10.0 | 930 | 0.1003 | 0.9881 | 0.6783 | 0.9047 | 0.7250 | 0.9470 | 0.6494 | 0.8661 |
69
+ | 0.0078 | 11.0 | 1023 | 0.1000 | 0.9889 | 0.7661 | 0.9144 | 0.8075 | 0.9571 | 0.7524 | 0.8753 |
70
+ | 0.0052 | 12.0 | 1116 | 0.1046 | 0.9890 | 0.7563 | 0.9166 | 0.7940 | 0.9619 | 0.7561 | 0.8753 |
71
+ | 0.0041 | 13.0 | 1209 | 0.1022 | 0.9892 | 0.7804 | 0.9177 | 0.8255 | 0.9644 | 0.7570 | 0.8753 |
72
+ | 0.0021 | 14.0 | 1302 | 0.0994 | 0.9887 | 0.7602 | 0.9133 | 0.7959 | 0.9547 | 0.7534 | 0.8753 |
73
+ | 0.0018 | 15.0 | 1395 | 0.1043 | 0.9895 | 0.7903 | 0.9212 | 0.8431 | 0.9694 | 0.7572 | 0.8776 |
74
+ | 0.0016 | 16.0 | 1488 | 0.1059 | 0.9898 | 0.7901 | 0.9235 | 0.8434 | 0.9744 | 0.7572 | 0.8776 |
75
+ | 0.0014 | 17.0 | 1581 | 0.1063 | 0.9898 | 0.7924 | 0.9235 | 0.8472 | 0.9744 | 0.7572 | 0.8776 |
76
+ | 0.001 | 18.0 | 1674 | 0.1052 | 0.9895 | 0.7899 | 0.9212 | 0.8429 | 0.9694 | 0.7572 | 0.8776 |
77
 
78
 
79
  ### Framework versions
config.json CHANGED
@@ -27,93 +27,91 @@
27
  "14": "I-R:PUNCT",
28
  "15": "B-R:SPELL",
29
  "16": "B-U:PRON",
30
- "17": "I-R:SPELL",
31
- "18": "B-R:WO",
32
- "19": "I-R:MORPH",
33
- "20": "B-R:VERB:TENSE",
34
- "21": "B-R:NOUN",
35
- "22": "I-R:OTHER",
36
- "23": "B-U:DET",
37
- "24": "B-U:PART",
38
- "25": "B-R:ADV",
39
- "26": "I-R:VERB:SVA",
40
- "27": "I-R:DET",
41
- "28": "B-R:NOUN:NUM",
42
- "29": "O",
43
- "30": "B-R:ADJ",
44
- "31": "B-U:VERB:TENSE",
45
- "32": "B-R:PRON",
46
- "33": "I-U:VERB",
47
- "34": "I-R:ADV",
48
- "35": "I-R:VERB:FORM",
49
- "36": "B-R:OTHER",
50
- "37": "B-U:VERB:FORM",
51
- "38": "B-U:ADV",
52
- "39": "B-R:NOUN:INFL",
53
- "40": "B-R:PART",
54
- "41": "B-U:OTHER",
55
- "42": "I-R:VERB",
56
- "43": "B-R:PUNCT",
57
- "44": "B-R:VERB",
58
- "45": "B-R:DET",
59
- "46": "B-R:MORPH",
60
- "47": "I-R:NOUN",
61
- "48": "B-R:VERB:FORM",
62
- "49": "B-R:ADJ:FORM"
63
  },
64
  "initializer_range": 0.02,
65
  "intermediate_size": 3072,
66
  "label2id": {
67
- "B-R:ADJ": 30,
68
- "B-R:ADJ:FORM": 49,
69
- "B-R:ADV": 25,
70
- "B-R:DET": 45,
71
- "B-R:MORPH": 46,
72
- "B-R:NOUN": 21,
73
- "B-R:NOUN:INFL": 39,
74
- "B-R:NOUN:NUM": 28,
75
- "B-R:OTHER": 36,
76
- "B-R:PART": 40,
77
  "B-R:PREP": 11,
78
- "B-R:PRON": 32,
79
- "B-R:PUNCT": 43,
80
  "B-R:SPELL": 15,
81
- "B-R:VERB": 44,
82
- "B-R:VERB:FORM": 48,
83
  "B-R:VERB:INFL": 7,
84
  "B-R:VERB:SVA": 9,
85
- "B-R:VERB:TENSE": 20,
86
- "B-R:WO": 18,
87
- "B-U:ADV": 38,
88
  "B-U:CONJ": 13,
89
- "B-U:DET": 23,
90
  "B-U:NOUN": 10,
91
- "B-U:OTHER": 41,
92
- "B-U:PART": 24,
93
  "B-U:PREP": 0,
94
  "B-U:PRON": 16,
95
  "B-U:VERB": 6,
96
- "B-U:VERB:FORM": 37,
97
- "B-U:VERB:TENSE": 31,
98
- "I-R:ADV": 34,
99
- "I-R:DET": 27,
100
- "I-R:MORPH": 19,
101
- "I-R:NOUN": 47,
102
  "I-R:NOUN:INFL": 2,
103
- "I-R:OTHER": 22,
104
  "I-R:PUNCT": 14,
105
- "I-R:SPELL": 17,
106
- "I-R:VERB": 42,
107
- "I-R:VERB:FORM": 35,
108
  "I-R:VERB:INFL": 8,
109
- "I-R:VERB:SVA": 26,
110
  "I-R:VERB:TENSE": 3,
111
  "I-R:WO": 4,
112
  "I-U:NOUN": 12,
113
  "I-U:OTHER": 5,
114
- "I-U:VERB": 33,
115
  "I-U:VERB:TENSE": 1,
116
- "O": 29
117
  },
118
  "layer_norm_eps": 1e-12,
119
  "max_position_embeddings": 512,
 
27
  "14": "I-R:PUNCT",
28
  "15": "B-R:SPELL",
29
  "16": "B-U:PRON",
30
+ "17": "B-R:WO",
31
+ "18": "I-R:MORPH",
32
+ "19": "B-R:VERB:TENSE",
33
+ "20": "B-R:NOUN",
34
+ "21": "I-R:OTHER",
35
+ "22": "B-U:DET",
36
+ "23": "B-U:PART",
37
+ "24": "B-R:ADV",
38
+ "25": "I-R:VERB:SVA",
39
+ "26": "I-R:DET",
40
+ "27": "B-R:NOUN:NUM",
41
+ "28": "O",
42
+ "29": "B-R:ADJ",
43
+ "30": "B-U:VERB:TENSE",
44
+ "31": "B-R:PRON",
45
+ "32": "I-U:VERB",
46
+ "33": "I-R:ADV",
47
+ "34": "I-R:VERB:FORM",
48
+ "35": "B-R:OTHER",
49
+ "36": "B-U:VERB:FORM",
50
+ "37": "B-U:ADV",
51
+ "38": "B-R:NOUN:INFL",
52
+ "39": "B-R:PART",
53
+ "40": "B-U:OTHER",
54
+ "41": "I-R:VERB",
55
+ "42": "B-R:PUNCT",
56
+ "43": "B-R:VERB",
57
+ "44": "B-R:DET",
58
+ "45": "B-R:MORPH",
59
+ "46": "I-R:NOUN",
60
+ "47": "B-R:VERB:FORM",
61
+ "48": "B-R:ADJ:FORM"
 
62
  },
63
  "initializer_range": 0.02,
64
  "intermediate_size": 3072,
65
  "label2id": {
66
+ "B-R:ADJ": 29,
67
+ "B-R:ADJ:FORM": 48,
68
+ "B-R:ADV": 24,
69
+ "B-R:DET": 44,
70
+ "B-R:MORPH": 45,
71
+ "B-R:NOUN": 20,
72
+ "B-R:NOUN:INFL": 38,
73
+ "B-R:NOUN:NUM": 27,
74
+ "B-R:OTHER": 35,
75
+ "B-R:PART": 39,
76
  "B-R:PREP": 11,
77
+ "B-R:PRON": 31,
78
+ "B-R:PUNCT": 42,
79
  "B-R:SPELL": 15,
80
+ "B-R:VERB": 43,
81
+ "B-R:VERB:FORM": 47,
82
  "B-R:VERB:INFL": 7,
83
  "B-R:VERB:SVA": 9,
84
+ "B-R:VERB:TENSE": 19,
85
+ "B-R:WO": 17,
86
+ "B-U:ADV": 37,
87
  "B-U:CONJ": 13,
88
+ "B-U:DET": 22,
89
  "B-U:NOUN": 10,
90
+ "B-U:OTHER": 40,
91
+ "B-U:PART": 23,
92
  "B-U:PREP": 0,
93
  "B-U:PRON": 16,
94
  "B-U:VERB": 6,
95
+ "B-U:VERB:FORM": 36,
96
+ "B-U:VERB:TENSE": 30,
97
+ "I-R:ADV": 33,
98
+ "I-R:DET": 26,
99
+ "I-R:MORPH": 18,
100
+ "I-R:NOUN": 46,
101
  "I-R:NOUN:INFL": 2,
102
+ "I-R:OTHER": 21,
103
  "I-R:PUNCT": 14,
104
+ "I-R:VERB": 41,
105
+ "I-R:VERB:FORM": 34,
 
106
  "I-R:VERB:INFL": 8,
107
+ "I-R:VERB:SVA": 25,
108
  "I-R:VERB:TENSE": 3,
109
  "I-R:WO": 4,
110
  "I-U:NOUN": 12,
111
  "I-U:OTHER": 5,
112
+ "I-U:VERB": 32,
113
  "I-U:VERB:TENSE": 1,
114
+ "O": 28
115
  },
116
  "layer_norm_eps": 1e-12,
117
  "max_position_embeddings": 512,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:898ad17c5286bdcb31b773b2aaaa66a85e4495c16dcd5f78404e3a492b04d3d9
3
- size 435743736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab820c4efd09dcf60ee9c2b9bb3a9f9b68f8f81aefb1c047b73b40856c6465b6
3
+ size 435740660
runs/Dec12_22-43-39_a365e7b9463b/events.out.tfevents.1734043420.a365e7b9463b.6288.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fca3a7cf48b5b7749af360c691a761512f0152eaf9abacaab054cd4912c407
3
+ size 23264
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba559b60c5d760413ca4c35858fba48e69260c499152a0432bcbe8cfdd2f1d6
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0652214a44dd34e8b6c9dda375d7c44a9fb9ca509a02f4ca662f6432266dd72
3
  size 5304