Marcos12886 commited on
Commit
8fe0224
1 Parent(s): b8afdfb

Upload folder using huggingface_hub

Browse files
Files changed (42) hide show
  1. README.md +7 -10
  2. checkpoint-1600/config.json +85 -0
  3. checkpoint-1600/model.safetensors +3 -0
  4. checkpoint-1600/optimizer.pt +3 -0
  5. checkpoint-1600/rng_state.pth +3 -0
  6. checkpoint-1600/scheduler.pt +3 -0
  7. checkpoint-1600/trainer_state.json +570 -0
  8. checkpoint-1600/training_args.bin +3 -0
  9. checkpoint-1800/config.json +85 -0
  10. checkpoint-1800/model.safetensors +3 -0
  11. checkpoint-1800/optimizer.pt +3 -0
  12. checkpoint-1800/rng_state.pth +3 -0
  13. checkpoint-1800/scheduler.pt +3 -0
  14. checkpoint-1800/trainer_state.json +636 -0
  15. checkpoint-1800/training_args.bin +3 -0
  16. checkpoint-2000/config.json +85 -0
  17. checkpoint-2000/model.safetensors +3 -0
  18. checkpoint-2000/optimizer.pt +3 -0
  19. checkpoint-2000/rng_state.pth +3 -0
  20. checkpoint-2000/scheduler.pt +3 -0
  21. checkpoint-2000/trainer_state.json +702 -0
  22. checkpoint-2000/training_args.bin +3 -0
  23. checkpoint-2200/config.json +85 -0
  24. checkpoint-2200/model.safetensors +3 -0
  25. checkpoint-2200/optimizer.pt +3 -0
  26. checkpoint-2200/rng_state.pth +3 -0
  27. checkpoint-2200/scheduler.pt +3 -0
  28. checkpoint-2200/trainer_state.json +768 -0
  29. checkpoint-2200/training_args.bin +3 -0
  30. checkpoint-2400/config.json +85 -0
  31. checkpoint-2400/model.safetensors +3 -0
  32. checkpoint-2400/optimizer.pt +3 -0
  33. checkpoint-2400/rng_state.pth +3 -0
  34. checkpoint-2400/scheduler.pt +3 -0
  35. checkpoint-2400/trainer_state.json +834 -0
  36. checkpoint-2400/training_args.bin +3 -0
  37. model.safetensors +1 -1
  38. runs/Sep14_17-06-50_ubumarcos/events.out.tfevents.1726327100.ubumarcos +2 -2
  39. runs/Sep14_18-07-00_ubumarcos/events.out.tfevents.1726330021.ubumarcos +3 -0
  40. runs/Sep14_18-24-29_ubumarcos/events.out.tfevents.1726331070.ubumarcos +3 -0
  41. runs/Sep14_18-24-29_ubumarcos/events.out.tfevents.1726331984.ubumarcos +3 -0
  42. training_args.bin +1 -1
README.md CHANGED
@@ -21,12 +21,12 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.7942
25
- - Accuracy: 0.8242
26
- - F1: 0.8278
27
- - Precision: 0.8347
28
- - Recall: 0.8242
29
- - Confusion Matrix: [[51, 10, 0, 2], [5, 44, 9, 0], [1, 14, 67, 0], [7, 0, 0, 63]]
30
 
31
  ## Model description
32
 
@@ -54,14 +54,11 @@ The following hyperparameters were used during training:
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: cosine_with_restarts
56
  - lr_scheduler_warmup_ratio: 0.1
57
- - num_epochs: 30
58
  - label_smoothing_factor: 0.1
59
 
60
  ### Training results
61
 
62
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall | Confusion Matrix |
63
- |:-------------:|:-------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|:--------------------------------------------------------------:|
64
- | 0.3691 | 22.2222 | 100 | 0.7942 | 0.8242 | 0.8278 | 0.8347 | 0.8242 | [[51, 10, 0, 2], [5, 44, 9, 0], [1, 14, 67, 0], [7, 0, 0, 63]] |
65
 
66
 
67
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.6498
25
+ - Accuracy: 0.8645
26
+ - F1: 0.8634
27
+ - Precision: 0.8632
28
+ - Recall: 0.8645
29
+ - Confusion Matrix: [[50, 8, 0, 2], [7, 53, 12, 0], [0, 8, 60, 0], [0, 0, 0, 73]]
30
 
31
  ## Model description
32
 
 
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: cosine_with_restarts
56
  - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 40
58
  - label_smoothing_factor: 0.1
59
 
60
  ### Training results
61
 
 
 
 
62
 
63
 
64
  ### Framework versions
checkpoint-1600/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-1600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6aef261b8260f4ac3a6428f02db7c4bcca3a99bc5665fb8044d613ce39c6fb8
3
+ size 94765560
checkpoint-1600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29862664d27f45e2f2994143a8262a93a21f00e9daf67dfc1cbc1691c418eae
3
+ size 189556666
checkpoint-1600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ef17f9c931698b6c110fc829b63100d0d1fc71bac548498af01bb1827cde472
3
+ size 14308
checkpoint-1600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddaa3a198579e44a64f9285103acba7ea329354b730d6cc3596b8951d6c763d2
3
+ size 1064
checkpoint-1600/trainer_state.json ADDED
@@ -0,0 +1,570 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8462423027109934,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1400",
4
+ "epoch": 23.357664233576642,
5
+ "eval_steps": 200,
6
+ "global_step": 1600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7299270072992701,
13
+ "grad_norm": 2.289438486099243,
14
+ "learning_rate": 5.5147058823529414e-05,
15
+ "loss": 1.2878,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.4598540145985401,
20
+ "grad_norm": 2.5048491954803467,
21
+ "learning_rate": 0.00011029411764705883,
22
+ "loss": 0.8322,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 2.18978102189781,
27
+ "grad_norm": 11.18371295928955,
28
+ "learning_rate": 0.00016544117647058823,
29
+ "loss": 0.7897,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 2.9197080291970803,
34
+ "grad_norm": 9.702393531799316,
35
+ "learning_rate": 0.00022058823529411765,
36
+ "loss": 0.7149,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 2.9197080291970803,
41
+ "eval_accuracy": 0.7252747252747253,
42
+ "eval_confusion_matrix": [
43
+ [
44
+ 34,
45
+ 39,
46
+ 0,
47
+ 2
48
+ ],
49
+ [
50
+ 7,
51
+ 62,
52
+ 6,
53
+ 0
54
+ ],
55
+ [
56
+ 0,
57
+ 19,
58
+ 43,
59
+ 0
60
+ ],
61
+ [
62
+ 0,
63
+ 2,
64
+ 0,
65
+ 59
66
+ ]
67
+ ],
68
+ "eval_f1": 0.7260427659517454,
69
+ "eval_loss": 0.9058456420898438,
70
+ "eval_precision": 0.7828499608603893,
71
+ "eval_recall": 0.7252747252747253,
72
+ "eval_runtime": 3.7417,
73
+ "eval_samples_per_second": 72.962,
74
+ "eval_steps_per_second": 0.802,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 3.6496350364963503,
79
+ "grad_norm": 8.47255802154541,
80
+ "learning_rate": 0.000275735294117647,
81
+ "loss": 0.6917,
82
+ "step": 250
83
+ },
84
+ {
85
+ "epoch": 4.37956204379562,
86
+ "grad_norm": 16.689321517944336,
87
+ "learning_rate": 0.0002999031705390845,
88
+ "loss": 0.7264,
89
+ "step": 300
90
+ },
91
+ {
92
+ "epoch": 5.109489051094891,
93
+ "grad_norm": 1.7369310855865479,
94
+ "learning_rate": 0.00029924913005299595,
95
+ "loss": 0.6895,
96
+ "step": 350
97
+ },
98
+ {
99
+ "epoch": 5.839416058394161,
100
+ "grad_norm": 2.210369348526001,
101
+ "learning_rate": 0.0002979807906935489,
102
+ "loss": 0.6939,
103
+ "step": 400
104
+ },
105
+ {
106
+ "epoch": 5.839416058394161,
107
+ "eval_accuracy": 0.7509157509157509,
108
+ "eval_confusion_matrix": [
109
+ [
110
+ 66,
111
+ 2,
112
+ 0,
113
+ 7
114
+ ],
115
+ [
116
+ 29,
117
+ 38,
118
+ 7,
119
+ 1
120
+ ],
121
+ [
122
+ 2,
123
+ 20,
124
+ 40,
125
+ 0
126
+ ],
127
+ [
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 61
132
+ ]
133
+ ],
134
+ "eval_f1": 0.7418721712792054,
135
+ "eval_loss": 0.8107791543006897,
136
+ "eval_precision": 0.7517378077426524,
137
+ "eval_recall": 0.7509157509157509,
138
+ "eval_runtime": 3.7702,
139
+ "eval_samples_per_second": 72.409,
140
+ "eval_steps_per_second": 0.796,
141
+ "step": 400
142
+ },
143
+ {
144
+ "epoch": 6.569343065693431,
145
+ "grad_norm": 2.1358511447906494,
146
+ "learning_rate": 0.000296103372855926,
147
+ "loss": 0.5986,
148
+ "step": 450
149
+ },
150
+ {
151
+ "epoch": 7.299270072992701,
152
+ "grad_norm": 13.704009056091309,
153
+ "learning_rate": 0.0002936246038592886,
154
+ "loss": 0.5932,
155
+ "step": 500
156
+ },
157
+ {
158
+ "epoch": 8.02919708029197,
159
+ "grad_norm": 2.032876968383789,
160
+ "learning_rate": 0.00029055468614167716,
161
+ "loss": 0.5633,
162
+ "step": 550
163
+ },
164
+ {
165
+ "epoch": 8.75912408759124,
166
+ "grad_norm": 28.525798797607422,
167
+ "learning_rate": 0.00028690625526749705,
168
+ "loss": 0.4941,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 8.75912408759124,
173
+ "eval_accuracy": 0.8241758241758241,
174
+ "eval_confusion_matrix": [
175
+ [
176
+ 63,
177
+ 8,
178
+ 1,
179
+ 3
180
+ ],
181
+ [
182
+ 8,
183
+ 50,
184
+ 17,
185
+ 0
186
+ ],
187
+ [
188
+ 2,
189
+ 9,
190
+ 51,
191
+ 0
192
+ ],
193
+ [
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 61
198
+ ]
199
+ ],
200
+ "eval_f1": 0.8222676260809794,
201
+ "eval_loss": 0.7625077366828918,
202
+ "eval_precision": 0.8229409839103053,
203
+ "eval_recall": 0.8241758241758241,
204
+ "eval_runtime": 3.757,
205
+ "eval_samples_per_second": 72.664,
206
+ "eval_steps_per_second": 0.799,
207
+ "step": 600
208
+ },
209
+ {
210
+ "epoch": 9.489051094890511,
211
+ "grad_norm": 0.18371808528900146,
212
+ "learning_rate": 0.0002826943279204283,
213
+ "loss": 0.4842,
214
+ "step": 650
215
+ },
216
+ {
217
+ "epoch": 10.218978102189782,
218
+ "grad_norm": 11.426072120666504,
219
+ "learning_rate": 0.0002779362400958168,
220
+ "loss": 0.4352,
221
+ "step": 700
222
+ },
223
+ {
224
+ "epoch": 10.94890510948905,
225
+ "grad_norm": 8.062601089477539,
226
+ "learning_rate": 0.0002726515757469423,
227
+ "loss": 0.4447,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 11.678832116788321,
232
+ "grad_norm": 0.3985881805419922,
233
+ "learning_rate": 0.00026686208617885055,
234
+ "loss": 0.442,
235
+ "step": 800
236
+ },
237
+ {
238
+ "epoch": 11.678832116788321,
239
+ "eval_accuracy": 0.7985347985347986,
240
+ "eval_confusion_matrix": [
241
+ [
242
+ 66,
243
+ 6,
244
+ 1,
245
+ 2
246
+ ],
247
+ [
248
+ 15,
249
+ 32,
250
+ 26,
251
+ 2
252
+ ],
253
+ [
254
+ 2,
255
+ 1,
256
+ 59,
257
+ 0
258
+ ],
259
+ [
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 61
264
+ ]
265
+ ],
266
+ "eval_f1": 0.781170020153555,
267
+ "eval_loss": 0.9623217582702637,
268
+ "eval_precision": 0.8093701586901577,
269
+ "eval_recall": 0.7985347985347986,
270
+ "eval_runtime": 3.774,
271
+ "eval_samples_per_second": 72.337,
272
+ "eval_steps_per_second": 0.795,
273
+ "step": 800
274
+ },
275
+ {
276
+ "epoch": 12.408759124087592,
277
+ "grad_norm": 38.726985931396484,
278
+ "learning_rate": 0.0002605916005215186,
279
+ "loss": 0.4504,
280
+ "step": 850
281
+ },
282
+ {
283
+ "epoch": 13.138686131386862,
284
+ "grad_norm": 0.026563748717308044,
285
+ "learning_rate": 0.0002538659276508397,
286
+ "loss": 0.3903,
287
+ "step": 900
288
+ },
289
+ {
290
+ "epoch": 13.86861313868613,
291
+ "grad_norm": 0.06770322471857071,
292
+ "learning_rate": 0.0002467127499611136,
293
+ "loss": 0.4094,
294
+ "step": 950
295
+ },
296
+ {
297
+ "epoch": 14.598540145985401,
298
+ "grad_norm": 1.2612749338150024,
299
+ "learning_rate": 0.00023916150942626798,
300
+ "loss": 0.4188,
301
+ "step": 1000
302
+ },
303
+ {
304
+ "epoch": 14.598540145985401,
305
+ "eval_accuracy": 0.8315018315018315,
306
+ "eval_confusion_matrix": [
307
+ [
308
+ 60,
309
+ 9,
310
+ 2,
311
+ 4
312
+ ],
313
+ [
314
+ 8,
315
+ 56,
316
+ 11,
317
+ 0
318
+ ],
319
+ [
320
+ 1,
321
+ 11,
322
+ 50,
323
+ 0
324
+ ],
325
+ [
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 61
330
+ ]
331
+ ],
332
+ "eval_f1": 0.8307422385946511,
333
+ "eval_loss": 0.8534455299377441,
334
+ "eval_precision": 0.8312566016541674,
335
+ "eval_recall": 0.8315018315018315,
336
+ "eval_runtime": 3.796,
337
+ "eval_samples_per_second": 71.917,
338
+ "eval_steps_per_second": 0.79,
339
+ "step": 1000
340
+ },
341
+ {
342
+ "epoch": 15.328467153284672,
343
+ "grad_norm": 28.980899810791016,
344
+ "learning_rate": 0.0002312432864187738,
345
+ "loss": 0.3798,
346
+ "step": 1050
347
+ },
348
+ {
349
+ "epoch": 16.05839416058394,
350
+ "grad_norm": 0.022609323263168335,
351
+ "learning_rate": 0.0002229906717850284,
352
+ "loss": 0.3672,
353
+ "step": 1100
354
+ },
355
+ {
356
+ "epoch": 16.78832116788321,
357
+ "grad_norm": 0.02360348217189312,
358
+ "learning_rate": 0.00021443763270373483,
359
+ "loss": 0.3715,
360
+ "step": 1150
361
+ },
362
+ {
363
+ "epoch": 17.51824817518248,
364
+ "grad_norm": 0.014020542614161968,
365
+ "learning_rate": 0.0002056193728793941,
366
+ "loss": 0.349,
367
+ "step": 1200
368
+ },
369
+ {
370
+ "epoch": 17.51824817518248,
371
+ "eval_accuracy": 0.8351648351648352,
372
+ "eval_confusion_matrix": [
373
+ [
374
+ 62,
375
+ 10,
376
+ 1,
377
+ 2
378
+ ],
379
+ [
380
+ 9,
381
+ 57,
382
+ 9,
383
+ 0
384
+ ],
385
+ [
386
+ 2,
387
+ 12,
388
+ 48,
389
+ 0
390
+ ],
391
+ [
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 61
396
+ ]
397
+ ],
398
+ "eval_f1": 0.8350675728555914,
399
+ "eval_loss": 0.8131950497627258,
400
+ "eval_precision": 0.8358475863688551,
401
+ "eval_recall": 0.8351648351648352,
402
+ "eval_runtime": 3.7788,
403
+ "eval_samples_per_second": 72.246,
404
+ "eval_steps_per_second": 0.794,
405
+ "step": 1200
406
+ },
407
+ {
408
+ "epoch": 18.248175182481752,
409
+ "grad_norm": 0.006028232164680958,
410
+ "learning_rate": 0.0001965721876463452,
411
+ "loss": 0.3491,
412
+ "step": 1250
413
+ },
414
+ {
415
+ "epoch": 18.978102189781023,
416
+ "grad_norm": 0.008285734802484512,
417
+ "learning_rate": 0.00018733331457973358,
418
+ "loss": 0.3489,
419
+ "step": 1300
420
+ },
421
+ {
422
+ "epoch": 19.708029197080293,
423
+ "grad_norm": 0.008053851313889027,
424
+ "learning_rate": 0.00017794078022828275,
425
+ "loss": 0.3497,
426
+ "step": 1350
427
+ },
428
+ {
429
+ "epoch": 20.437956204379564,
430
+ "grad_norm": 0.003234422067180276,
431
+ "learning_rate": 0.00016843324359970712,
432
+ "loss": 0.3488,
433
+ "step": 1400
434
+ },
435
+ {
436
+ "epoch": 20.437956204379564,
437
+ "eval_accuracy": 0.8461538461538461,
438
+ "eval_confusion_matrix": [
439
+ [
440
+ 61,
441
+ 11,
442
+ 1,
443
+ 2
444
+ ],
445
+ [
446
+ 8,
447
+ 57,
448
+ 10,
449
+ 0
450
+ ],
451
+ [
452
+ 0,
453
+ 10,
454
+ 52,
455
+ 0
456
+ ],
457
+ [
458
+ 0,
459
+ 0,
460
+ 0,
461
+ 61
462
+ ]
463
+ ],
464
+ "eval_f1": 0.8462423027109934,
465
+ "eval_loss": 0.7859560251235962,
466
+ "eval_precision": 0.8474363933035696,
467
+ "eval_recall": 0.8461538461538461,
468
+ "eval_runtime": 3.7947,
469
+ "eval_samples_per_second": 71.942,
470
+ "eval_steps_per_second": 0.791,
471
+ "step": 1400
472
+ },
473
+ {
474
+ "epoch": 21.16788321167883,
475
+ "grad_norm": 0.004595920909196138,
476
+ "learning_rate": 0.00015884983704296757,
477
+ "loss": 0.3488,
478
+ "step": 1450
479
+ },
480
+ {
481
+ "epoch": 21.8978102189781,
482
+ "grad_norm": 0.002511706668883562,
483
+ "learning_rate": 0.00014923000518228847,
484
+ "loss": 0.3488,
485
+ "step": 1500
486
+ },
487
+ {
488
+ "epoch": 22.62773722627737,
489
+ "grad_norm": 0.002340014325454831,
490
+ "learning_rate": 0.00013961334256587125,
491
+ "loss": 0.3488,
492
+ "step": 1550
493
+ },
494
+ {
495
+ "epoch": 23.357664233576642,
496
+ "grad_norm": 0.0028287076856940985,
497
+ "learning_rate": 0.00013003943069753198,
498
+ "loss": 0.3488,
499
+ "step": 1600
500
+ },
501
+ {
502
+ "epoch": 23.357664233576642,
503
+ "eval_accuracy": 0.8461538461538461,
504
+ "eval_confusion_matrix": [
505
+ [
506
+ 61,
507
+ 11,
508
+ 1,
509
+ 2
510
+ ],
511
+ [
512
+ 8,
513
+ 57,
514
+ 10,
515
+ 0
516
+ ],
517
+ [
518
+ 0,
519
+ 10,
520
+ 52,
521
+ 0
522
+ ],
523
+ [
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 61
528
+ ]
529
+ ],
530
+ "eval_f1": 0.8462423027109934,
531
+ "eval_loss": 0.7856015563011169,
532
+ "eval_precision": 0.8474363933035696,
533
+ "eval_recall": 0.8461538461538461,
534
+ "eval_runtime": 3.7861,
535
+ "eval_samples_per_second": 72.105,
536
+ "eval_steps_per_second": 0.792,
537
+ "step": 1600
538
+ }
539
+ ],
540
+ "logging_steps": 50,
541
+ "max_steps": 2720,
542
+ "num_input_tokens_seen": 0,
543
+ "num_train_epochs": 40,
544
+ "save_steps": 200,
545
+ "stateful_callbacks": {
546
+ "EarlyStoppingCallback": {
547
+ "args": {
548
+ "early_stopping_patience": 5,
549
+ "early_stopping_threshold": 0.001
550
+ },
551
+ "attributes": {
552
+ "early_stopping_patience_counter": 0
553
+ }
554
+ },
555
+ "TrainerControl": {
556
+ "args": {
557
+ "should_epoch_stop": false,
558
+ "should_evaluate": false,
559
+ "should_log": false,
560
+ "should_save": true,
561
+ "should_training_stop": false
562
+ },
563
+ "attributes": {}
564
+ }
565
+ },
566
+ "total_flos": 5.790635667072e+16,
567
+ "train_batch_size": 8,
568
+ "trial_name": null,
569
+ "trial_params": null
570
+ }
checkpoint-1600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cd6053c9e08edd46715ea6144f2b03c4f4ac31b547b8ae042d0b5e4a21ad64
3
+ size 5240
checkpoint-1800/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-1800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3892e9230bb2f7ce41c962ffb147c373f6606471a9ef74da585894adff1bfb70
3
+ size 94765560
checkpoint-1800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41acf717dcb1a8f15aa79d79b64f778e5f5dd6e316eec73482e77fb1177cb39c
3
+ size 189556666
checkpoint-1800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4142f79ad6a7fc590ff6315301e758c94ec69a4bddf4dbd0f59cb647a60bed
3
+ size 14308
checkpoint-1800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64558c90d8f26d69b3225a367b121a3b1f071de723e5c1c651fdece834417a36
3
+ size 1064
checkpoint-1800/trainer_state.json ADDED
@@ -0,0 +1,636 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8535663673078441,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1800",
4
+ "epoch": 26.277372262773724,
5
+ "eval_steps": 200,
6
+ "global_step": 1800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7299270072992701,
13
+ "grad_norm": 2.289438486099243,
14
+ "learning_rate": 5.5147058823529414e-05,
15
+ "loss": 1.2878,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.4598540145985401,
20
+ "grad_norm": 2.5048491954803467,
21
+ "learning_rate": 0.00011029411764705883,
22
+ "loss": 0.8322,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 2.18978102189781,
27
+ "grad_norm": 11.18371295928955,
28
+ "learning_rate": 0.00016544117647058823,
29
+ "loss": 0.7897,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 2.9197080291970803,
34
+ "grad_norm": 9.702393531799316,
35
+ "learning_rate": 0.00022058823529411765,
36
+ "loss": 0.7149,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 2.9197080291970803,
41
+ "eval_accuracy": 0.7252747252747253,
42
+ "eval_confusion_matrix": [
43
+ [
44
+ 34,
45
+ 39,
46
+ 0,
47
+ 2
48
+ ],
49
+ [
50
+ 7,
51
+ 62,
52
+ 6,
53
+ 0
54
+ ],
55
+ [
56
+ 0,
57
+ 19,
58
+ 43,
59
+ 0
60
+ ],
61
+ [
62
+ 0,
63
+ 2,
64
+ 0,
65
+ 59
66
+ ]
67
+ ],
68
+ "eval_f1": 0.7260427659517454,
69
+ "eval_loss": 0.9058456420898438,
70
+ "eval_precision": 0.7828499608603893,
71
+ "eval_recall": 0.7252747252747253,
72
+ "eval_runtime": 3.7417,
73
+ "eval_samples_per_second": 72.962,
74
+ "eval_steps_per_second": 0.802,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 3.6496350364963503,
79
+ "grad_norm": 8.47255802154541,
80
+ "learning_rate": 0.000275735294117647,
81
+ "loss": 0.6917,
82
+ "step": 250
83
+ },
84
+ {
85
+ "epoch": 4.37956204379562,
86
+ "grad_norm": 16.689321517944336,
87
+ "learning_rate": 0.0002999031705390845,
88
+ "loss": 0.7264,
89
+ "step": 300
90
+ },
91
+ {
92
+ "epoch": 5.109489051094891,
93
+ "grad_norm": 1.7369310855865479,
94
+ "learning_rate": 0.00029924913005299595,
95
+ "loss": 0.6895,
96
+ "step": 350
97
+ },
98
+ {
99
+ "epoch": 5.839416058394161,
100
+ "grad_norm": 2.210369348526001,
101
+ "learning_rate": 0.0002979807906935489,
102
+ "loss": 0.6939,
103
+ "step": 400
104
+ },
105
+ {
106
+ "epoch": 5.839416058394161,
107
+ "eval_accuracy": 0.7509157509157509,
108
+ "eval_confusion_matrix": [
109
+ [
110
+ 66,
111
+ 2,
112
+ 0,
113
+ 7
114
+ ],
115
+ [
116
+ 29,
117
+ 38,
118
+ 7,
119
+ 1
120
+ ],
121
+ [
122
+ 2,
123
+ 20,
124
+ 40,
125
+ 0
126
+ ],
127
+ [
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 61
132
+ ]
133
+ ],
134
+ "eval_f1": 0.7418721712792054,
135
+ "eval_loss": 0.8107791543006897,
136
+ "eval_precision": 0.7517378077426524,
137
+ "eval_recall": 0.7509157509157509,
138
+ "eval_runtime": 3.7702,
139
+ "eval_samples_per_second": 72.409,
140
+ "eval_steps_per_second": 0.796,
141
+ "step": 400
142
+ },
143
+ {
144
+ "epoch": 6.569343065693431,
145
+ "grad_norm": 2.1358511447906494,
146
+ "learning_rate": 0.000296103372855926,
147
+ "loss": 0.5986,
148
+ "step": 450
149
+ },
150
+ {
151
+ "epoch": 7.299270072992701,
152
+ "grad_norm": 13.704009056091309,
153
+ "learning_rate": 0.0002936246038592886,
154
+ "loss": 0.5932,
155
+ "step": 500
156
+ },
157
+ {
158
+ "epoch": 8.02919708029197,
159
+ "grad_norm": 2.032876968383789,
160
+ "learning_rate": 0.00029055468614167716,
161
+ "loss": 0.5633,
162
+ "step": 550
163
+ },
164
+ {
165
+ "epoch": 8.75912408759124,
166
+ "grad_norm": 28.525798797607422,
167
+ "learning_rate": 0.00028690625526749705,
168
+ "loss": 0.4941,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 8.75912408759124,
173
+ "eval_accuracy": 0.8241758241758241,
174
+ "eval_confusion_matrix": [
175
+ [
176
+ 63,
177
+ 8,
178
+ 1,
179
+ 3
180
+ ],
181
+ [
182
+ 8,
183
+ 50,
184
+ 17,
185
+ 0
186
+ ],
187
+ [
188
+ 2,
189
+ 9,
190
+ 51,
191
+ 0
192
+ ],
193
+ [
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 61
198
+ ]
199
+ ],
200
+ "eval_f1": 0.8222676260809794,
201
+ "eval_loss": 0.7625077366828918,
202
+ "eval_precision": 0.8229409839103053,
203
+ "eval_recall": 0.8241758241758241,
204
+ "eval_runtime": 3.757,
205
+ "eval_samples_per_second": 72.664,
206
+ "eval_steps_per_second": 0.799,
207
+ "step": 600
208
+ },
209
+ {
210
+ "epoch": 9.489051094890511,
211
+ "grad_norm": 0.18371808528900146,
212
+ "learning_rate": 0.0002826943279204283,
213
+ "loss": 0.4842,
214
+ "step": 650
215
+ },
216
+ {
217
+ "epoch": 10.218978102189782,
218
+ "grad_norm": 11.426072120666504,
219
+ "learning_rate": 0.0002779362400958168,
220
+ "loss": 0.4352,
221
+ "step": 700
222
+ },
223
+ {
224
+ "epoch": 10.94890510948905,
225
+ "grad_norm": 8.062601089477539,
226
+ "learning_rate": 0.0002726515757469423,
227
+ "loss": 0.4447,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 11.678832116788321,
232
+ "grad_norm": 0.3985881805419922,
233
+ "learning_rate": 0.00026686208617885055,
234
+ "loss": 0.442,
235
+ "step": 800
236
+ },
237
+ {
238
+ "epoch": 11.678832116788321,
239
+ "eval_accuracy": 0.7985347985347986,
240
+ "eval_confusion_matrix": [
241
+ [
242
+ 66,
243
+ 6,
244
+ 1,
245
+ 2
246
+ ],
247
+ [
248
+ 15,
249
+ 32,
250
+ 26,
251
+ 2
252
+ ],
253
+ [
254
+ 2,
255
+ 1,
256
+ 59,
257
+ 0
258
+ ],
259
+ [
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 61
264
+ ]
265
+ ],
266
+ "eval_f1": 0.781170020153555,
267
+ "eval_loss": 0.9623217582702637,
268
+ "eval_precision": 0.8093701586901577,
269
+ "eval_recall": 0.7985347985347986,
270
+ "eval_runtime": 3.774,
271
+ "eval_samples_per_second": 72.337,
272
+ "eval_steps_per_second": 0.795,
273
+ "step": 800
274
+ },
275
+ {
276
+ "epoch": 12.408759124087592,
277
+ "grad_norm": 38.726985931396484,
278
+ "learning_rate": 0.0002605916005215186,
279
+ "loss": 0.4504,
280
+ "step": 850
281
+ },
282
+ {
283
+ "epoch": 13.138686131386862,
284
+ "grad_norm": 0.026563748717308044,
285
+ "learning_rate": 0.0002538659276508397,
286
+ "loss": 0.3903,
287
+ "step": 900
288
+ },
289
+ {
290
+ "epoch": 13.86861313868613,
291
+ "grad_norm": 0.06770322471857071,
292
+ "learning_rate": 0.0002467127499611136,
293
+ "loss": 0.4094,
294
+ "step": 950
295
+ },
296
+ {
297
+ "epoch": 14.598540145985401,
298
+ "grad_norm": 1.2612749338150024,
299
+ "learning_rate": 0.00023916150942626798,
300
+ "loss": 0.4188,
301
+ "step": 1000
302
+ },
303
+ {
304
+ "epoch": 14.598540145985401,
305
+ "eval_accuracy": 0.8315018315018315,
306
+ "eval_confusion_matrix": [
307
+ [
308
+ 60,
309
+ 9,
310
+ 2,
311
+ 4
312
+ ],
313
+ [
314
+ 8,
315
+ 56,
316
+ 11,
317
+ 0
318
+ ],
319
+ [
320
+ 1,
321
+ 11,
322
+ 50,
323
+ 0
324
+ ],
325
+ [
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 61
330
+ ]
331
+ ],
332
+ "eval_f1": 0.8307422385946511,
333
+ "eval_loss": 0.8534455299377441,
334
+ "eval_precision": 0.8312566016541674,
335
+ "eval_recall": 0.8315018315018315,
336
+ "eval_runtime": 3.796,
337
+ "eval_samples_per_second": 71.917,
338
+ "eval_steps_per_second": 0.79,
339
+ "step": 1000
340
+ },
341
+ {
342
+ "epoch": 15.328467153284672,
343
+ "grad_norm": 28.980899810791016,
344
+ "learning_rate": 0.0002312432864187738,
345
+ "loss": 0.3798,
346
+ "step": 1050
347
+ },
348
+ {
349
+ "epoch": 16.05839416058394,
350
+ "grad_norm": 0.022609323263168335,
351
+ "learning_rate": 0.0002229906717850284,
352
+ "loss": 0.3672,
353
+ "step": 1100
354
+ },
355
+ {
356
+ "epoch": 16.78832116788321,
357
+ "grad_norm": 0.02360348217189312,
358
+ "learning_rate": 0.00021443763270373483,
359
+ "loss": 0.3715,
360
+ "step": 1150
361
+ },
362
+ {
363
+ "epoch": 17.51824817518248,
364
+ "grad_norm": 0.014020542614161968,
365
+ "learning_rate": 0.0002056193728793941,
366
+ "loss": 0.349,
367
+ "step": 1200
368
+ },
369
+ {
370
+ "epoch": 17.51824817518248,
371
+ "eval_accuracy": 0.8351648351648352,
372
+ "eval_confusion_matrix": [
373
+ [
374
+ 62,
375
+ 10,
376
+ 1,
377
+ 2
378
+ ],
379
+ [
380
+ 9,
381
+ 57,
382
+ 9,
383
+ 0
384
+ ],
385
+ [
386
+ 2,
387
+ 12,
388
+ 48,
389
+ 0
390
+ ],
391
+ [
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 61
396
+ ]
397
+ ],
398
+ "eval_f1": 0.8350675728555914,
399
+ "eval_loss": 0.8131950497627258,
400
+ "eval_precision": 0.8358475863688551,
401
+ "eval_recall": 0.8351648351648352,
402
+ "eval_runtime": 3.7788,
403
+ "eval_samples_per_second": 72.246,
404
+ "eval_steps_per_second": 0.794,
405
+ "step": 1200
406
+ },
407
+ {
408
+ "epoch": 18.248175182481752,
409
+ "grad_norm": 0.006028232164680958,
410
+ "learning_rate": 0.0001965721876463452,
411
+ "loss": 0.3491,
412
+ "step": 1250
413
+ },
414
+ {
415
+ "epoch": 18.978102189781023,
416
+ "grad_norm": 0.008285734802484512,
417
+ "learning_rate": 0.00018733331457973358,
418
+ "loss": 0.3489,
419
+ "step": 1300
420
+ },
421
+ {
422
+ "epoch": 19.708029197080293,
423
+ "grad_norm": 0.008053851313889027,
424
+ "learning_rate": 0.00017794078022828275,
425
+ "loss": 0.3497,
426
+ "step": 1350
427
+ },
428
+ {
429
+ "epoch": 20.437956204379564,
430
+ "grad_norm": 0.003234422067180276,
431
+ "learning_rate": 0.00016843324359970712,
432
+ "loss": 0.3488,
433
+ "step": 1400
434
+ },
435
+ {
436
+ "epoch": 20.437956204379564,
437
+ "eval_accuracy": 0.8461538461538461,
438
+ "eval_confusion_matrix": [
439
+ [
440
+ 61,
441
+ 11,
442
+ 1,
443
+ 2
444
+ ],
445
+ [
446
+ 8,
447
+ 57,
448
+ 10,
449
+ 0
450
+ ],
451
+ [
452
+ 0,
453
+ 10,
454
+ 52,
455
+ 0
456
+ ],
457
+ [
458
+ 0,
459
+ 0,
460
+ 0,
461
+ 61
462
+ ]
463
+ ],
464
+ "eval_f1": 0.8462423027109934,
465
+ "eval_loss": 0.7859560251235962,
466
+ "eval_precision": 0.8474363933035696,
467
+ "eval_recall": 0.8461538461538461,
468
+ "eval_runtime": 3.7947,
469
+ "eval_samples_per_second": 71.942,
470
+ "eval_steps_per_second": 0.791,
471
+ "step": 1400
472
+ },
473
+ {
474
+ "epoch": 21.16788321167883,
475
+ "grad_norm": 0.004595920909196138,
476
+ "learning_rate": 0.00015884983704296757,
477
+ "loss": 0.3488,
478
+ "step": 1450
479
+ },
480
+ {
481
+ "epoch": 21.8978102189781,
482
+ "grad_norm": 0.002511706668883562,
483
+ "learning_rate": 0.00014923000518228847,
484
+ "loss": 0.3488,
485
+ "step": 1500
486
+ },
487
+ {
488
+ "epoch": 22.62773722627737,
489
+ "grad_norm": 0.002340014325454831,
490
+ "learning_rate": 0.00013961334256587125,
491
+ "loss": 0.3488,
492
+ "step": 1550
493
+ },
494
+ {
495
+ "epoch": 23.357664233576642,
496
+ "grad_norm": 0.0028287076856940985,
497
+ "learning_rate": 0.00013003943069753198,
498
+ "loss": 0.3488,
499
+ "step": 1600
500
+ },
501
+ {
502
+ "epoch": 23.357664233576642,
503
+ "eval_accuracy": 0.8461538461538461,
504
+ "eval_confusion_matrix": [
505
+ [
506
+ 61,
507
+ 11,
508
+ 1,
509
+ 2
510
+ ],
511
+ [
512
+ 8,
513
+ 57,
514
+ 10,
515
+ 0
516
+ ],
517
+ [
518
+ 0,
519
+ 10,
520
+ 52,
521
+ 0
522
+ ],
523
+ [
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 61
528
+ ]
529
+ ],
530
+ "eval_f1": 0.8462423027109934,
531
+ "eval_loss": 0.7856015563011169,
532
+ "eval_precision": 0.8474363933035696,
533
+ "eval_recall": 0.8461538461538461,
534
+ "eval_runtime": 3.7861,
535
+ "eval_samples_per_second": 72.105,
536
+ "eval_steps_per_second": 0.792,
537
+ "step": 1600
538
+ },
539
+ {
540
+ "epoch": 24.087591240875913,
541
+ "grad_norm": 0.0027960864827036858,
542
+ "learning_rate": 0.00012054767512202832,
543
+ "loss": 0.3488,
544
+ "step": 1650
545
+ },
546
+ {
547
+ "epoch": 24.817518248175183,
548
+ "grad_norm": 0.0033820979297161102,
549
+ "learning_rate": 0.00011117714323462186,
550
+ "loss": 0.3488,
551
+ "step": 1700
552
+ },
553
+ {
554
+ "epoch": 25.547445255474454,
555
+ "grad_norm": 0.0034969367552548647,
556
+ "learning_rate": 0.00010196640348243974,
557
+ "loss": 0.3488,
558
+ "step": 1750
559
+ },
560
+ {
561
+ "epoch": 26.277372262773724,
562
+ "grad_norm": 0.0014958898536860943,
563
+ "learning_rate": 9.295336661947115e-05,
564
+ "loss": 0.3488,
565
+ "step": 1800
566
+ },
567
+ {
568
+ "epoch": 26.277372262773724,
569
+ "eval_accuracy": 0.8534798534798534,
570
+ "eval_confusion_matrix": [
571
+ [
572
+ 61,
573
+ 11,
574
+ 1,
575
+ 2
576
+ ],
577
+ [
578
+ 7,
579
+ 58,
580
+ 10,
581
+ 0
582
+ ],
583
+ [
584
+ 0,
585
+ 9,
586
+ 53,
587
+ 0
588
+ ],
589
+ [
590
+ 0,
591
+ 0,
592
+ 0,
593
+ 61
594
+ ]
595
+ ],
596
+ "eval_f1": 0.8535663673078441,
597
+ "eval_loss": 0.7831193804740906,
598
+ "eval_precision": 0.8551497604301419,
599
+ "eval_recall": 0.8534798534798534,
600
+ "eval_runtime": 3.7976,
601
+ "eval_samples_per_second": 71.888,
602
+ "eval_steps_per_second": 0.79,
603
+ "step": 1800
604
+ }
605
+ ],
606
+ "logging_steps": 50,
607
+ "max_steps": 2720,
608
+ "num_input_tokens_seen": 0,
609
+ "num_train_epochs": 40,
610
+ "save_steps": 200,
611
+ "stateful_callbacks": {
612
+ "EarlyStoppingCallback": {
613
+ "args": {
614
+ "early_stopping_patience": 5,
615
+ "early_stopping_threshold": 0.001
616
+ },
617
+ "attributes": {
618
+ "early_stopping_patience_counter": 0
619
+ }
620
+ },
621
+ "TrainerControl": {
622
+ "args": {
623
+ "should_epoch_stop": false,
624
+ "should_evaluate": false,
625
+ "should_log": false,
626
+ "should_save": true,
627
+ "should_training_stop": false
628
+ },
629
+ "attributes": {}
630
+ }
631
+ },
632
+ "total_flos": 6.514294558464e+16,
633
+ "train_batch_size": 8,
634
+ "trial_name": null,
635
+ "trial_params": null
636
+ }
checkpoint-1800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cd6053c9e08edd46715ea6144f2b03c4f4ac31b547b8ae042d0b5e4a21ad64
3
+ size 5240
checkpoint-2000/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa71495d44ac6eb2b2936ce624cab10e3a56f3350942326bf8ebe031bf375b2
3
+ size 94765560
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117dd1e658d23c2a6479a784272551423c67159733df85da2aa7fca20ddda9ce
3
+ size 189556666
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d0b969a93825609a509fcaba6d3524f8e7e0e5c183dc65390e343f717261d8
3
+ size 14308
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93d911c49d2a76eb941abe8dfcb1f8e265e872f188a8d3a69f77a40a24421e8
3
+ size 1064
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8535663673078441,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1800",
4
+ "epoch": 29.197080291970803,
5
+ "eval_steps": 200,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7299270072992701,
13
+ "grad_norm": 2.289438486099243,
14
+ "learning_rate": 5.5147058823529414e-05,
15
+ "loss": 1.2878,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.4598540145985401,
20
+ "grad_norm": 2.5048491954803467,
21
+ "learning_rate": 0.00011029411764705883,
22
+ "loss": 0.8322,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 2.18978102189781,
27
+ "grad_norm": 11.18371295928955,
28
+ "learning_rate": 0.00016544117647058823,
29
+ "loss": 0.7897,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 2.9197080291970803,
34
+ "grad_norm": 9.702393531799316,
35
+ "learning_rate": 0.00022058823529411765,
36
+ "loss": 0.7149,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 2.9197080291970803,
41
+ "eval_accuracy": 0.7252747252747253,
42
+ "eval_confusion_matrix": [
43
+ [
44
+ 34,
45
+ 39,
46
+ 0,
47
+ 2
48
+ ],
49
+ [
50
+ 7,
51
+ 62,
52
+ 6,
53
+ 0
54
+ ],
55
+ [
56
+ 0,
57
+ 19,
58
+ 43,
59
+ 0
60
+ ],
61
+ [
62
+ 0,
63
+ 2,
64
+ 0,
65
+ 59
66
+ ]
67
+ ],
68
+ "eval_f1": 0.7260427659517454,
69
+ "eval_loss": 0.9058456420898438,
70
+ "eval_precision": 0.7828499608603893,
71
+ "eval_recall": 0.7252747252747253,
72
+ "eval_runtime": 3.7417,
73
+ "eval_samples_per_second": 72.962,
74
+ "eval_steps_per_second": 0.802,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 3.6496350364963503,
79
+ "grad_norm": 8.47255802154541,
80
+ "learning_rate": 0.000275735294117647,
81
+ "loss": 0.6917,
82
+ "step": 250
83
+ },
84
+ {
85
+ "epoch": 4.37956204379562,
86
+ "grad_norm": 16.689321517944336,
87
+ "learning_rate": 0.0002999031705390845,
88
+ "loss": 0.7264,
89
+ "step": 300
90
+ },
91
+ {
92
+ "epoch": 5.109489051094891,
93
+ "grad_norm": 1.7369310855865479,
94
+ "learning_rate": 0.00029924913005299595,
95
+ "loss": 0.6895,
96
+ "step": 350
97
+ },
98
+ {
99
+ "epoch": 5.839416058394161,
100
+ "grad_norm": 2.210369348526001,
101
+ "learning_rate": 0.0002979807906935489,
102
+ "loss": 0.6939,
103
+ "step": 400
104
+ },
105
+ {
106
+ "epoch": 5.839416058394161,
107
+ "eval_accuracy": 0.7509157509157509,
108
+ "eval_confusion_matrix": [
109
+ [
110
+ 66,
111
+ 2,
112
+ 0,
113
+ 7
114
+ ],
115
+ [
116
+ 29,
117
+ 38,
118
+ 7,
119
+ 1
120
+ ],
121
+ [
122
+ 2,
123
+ 20,
124
+ 40,
125
+ 0
126
+ ],
127
+ [
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 61
132
+ ]
133
+ ],
134
+ "eval_f1": 0.7418721712792054,
135
+ "eval_loss": 0.8107791543006897,
136
+ "eval_precision": 0.7517378077426524,
137
+ "eval_recall": 0.7509157509157509,
138
+ "eval_runtime": 3.7702,
139
+ "eval_samples_per_second": 72.409,
140
+ "eval_steps_per_second": 0.796,
141
+ "step": 400
142
+ },
143
+ {
144
+ "epoch": 6.569343065693431,
145
+ "grad_norm": 2.1358511447906494,
146
+ "learning_rate": 0.000296103372855926,
147
+ "loss": 0.5986,
148
+ "step": 450
149
+ },
150
+ {
151
+ "epoch": 7.299270072992701,
152
+ "grad_norm": 13.704009056091309,
153
+ "learning_rate": 0.0002936246038592886,
154
+ "loss": 0.5932,
155
+ "step": 500
156
+ },
157
+ {
158
+ "epoch": 8.02919708029197,
159
+ "grad_norm": 2.032876968383789,
160
+ "learning_rate": 0.00029055468614167716,
161
+ "loss": 0.5633,
162
+ "step": 550
163
+ },
164
+ {
165
+ "epoch": 8.75912408759124,
166
+ "grad_norm": 28.525798797607422,
167
+ "learning_rate": 0.00028690625526749705,
168
+ "loss": 0.4941,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 8.75912408759124,
173
+ "eval_accuracy": 0.8241758241758241,
174
+ "eval_confusion_matrix": [
175
+ [
176
+ 63,
177
+ 8,
178
+ 1,
179
+ 3
180
+ ],
181
+ [
182
+ 8,
183
+ 50,
184
+ 17,
185
+ 0
186
+ ],
187
+ [
188
+ 2,
189
+ 9,
190
+ 51,
191
+ 0
192
+ ],
193
+ [
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 61
198
+ ]
199
+ ],
200
+ "eval_f1": 0.8222676260809794,
201
+ "eval_loss": 0.7625077366828918,
202
+ "eval_precision": 0.8229409839103053,
203
+ "eval_recall": 0.8241758241758241,
204
+ "eval_runtime": 3.757,
205
+ "eval_samples_per_second": 72.664,
206
+ "eval_steps_per_second": 0.799,
207
+ "step": 600
208
+ },
209
+ {
210
+ "epoch": 9.489051094890511,
211
+ "grad_norm": 0.18371808528900146,
212
+ "learning_rate": 0.0002826943279204283,
213
+ "loss": 0.4842,
214
+ "step": 650
215
+ },
216
+ {
217
+ "epoch": 10.218978102189782,
218
+ "grad_norm": 11.426072120666504,
219
+ "learning_rate": 0.0002779362400958168,
220
+ "loss": 0.4352,
221
+ "step": 700
222
+ },
223
+ {
224
+ "epoch": 10.94890510948905,
225
+ "grad_norm": 8.062601089477539,
226
+ "learning_rate": 0.0002726515757469423,
227
+ "loss": 0.4447,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 11.678832116788321,
232
+ "grad_norm": 0.3985881805419922,
233
+ "learning_rate": 0.00026686208617885055,
234
+ "loss": 0.442,
235
+ "step": 800
236
+ },
237
+ {
238
+ "epoch": 11.678832116788321,
239
+ "eval_accuracy": 0.7985347985347986,
240
+ "eval_confusion_matrix": [
241
+ [
242
+ 66,
243
+ 6,
244
+ 1,
245
+ 2
246
+ ],
247
+ [
248
+ 15,
249
+ 32,
250
+ 26,
251
+ 2
252
+ ],
253
+ [
254
+ 2,
255
+ 1,
256
+ 59,
257
+ 0
258
+ ],
259
+ [
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 61
264
+ ]
265
+ ],
266
+ "eval_f1": 0.781170020153555,
267
+ "eval_loss": 0.9623217582702637,
268
+ "eval_precision": 0.8093701586901577,
269
+ "eval_recall": 0.7985347985347986,
270
+ "eval_runtime": 3.774,
271
+ "eval_samples_per_second": 72.337,
272
+ "eval_steps_per_second": 0.795,
273
+ "step": 800
274
+ },
275
+ {
276
+ "epoch": 12.408759124087592,
277
+ "grad_norm": 38.726985931396484,
278
+ "learning_rate": 0.0002605916005215186,
279
+ "loss": 0.4504,
280
+ "step": 850
281
+ },
282
+ {
283
+ "epoch": 13.138686131386862,
284
+ "grad_norm": 0.026563748717308044,
285
+ "learning_rate": 0.0002538659276508397,
286
+ "loss": 0.3903,
287
+ "step": 900
288
+ },
289
+ {
290
+ "epoch": 13.86861313868613,
291
+ "grad_norm": 0.06770322471857071,
292
+ "learning_rate": 0.0002467127499611136,
293
+ "loss": 0.4094,
294
+ "step": 950
295
+ },
296
+ {
297
+ "epoch": 14.598540145985401,
298
+ "grad_norm": 1.2612749338150024,
299
+ "learning_rate": 0.00023916150942626798,
300
+ "loss": 0.4188,
301
+ "step": 1000
302
+ },
303
+ {
304
+ "epoch": 14.598540145985401,
305
+ "eval_accuracy": 0.8315018315018315,
306
+ "eval_confusion_matrix": [
307
+ [
308
+ 60,
309
+ 9,
310
+ 2,
311
+ 4
312
+ ],
313
+ [
314
+ 8,
315
+ 56,
316
+ 11,
317
+ 0
318
+ ],
319
+ [
320
+ 1,
321
+ 11,
322
+ 50,
323
+ 0
324
+ ],
325
+ [
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 61
330
+ ]
331
+ ],
332
+ "eval_f1": 0.8307422385946511,
333
+ "eval_loss": 0.8534455299377441,
334
+ "eval_precision": 0.8312566016541674,
335
+ "eval_recall": 0.8315018315018315,
336
+ "eval_runtime": 3.796,
337
+ "eval_samples_per_second": 71.917,
338
+ "eval_steps_per_second": 0.79,
339
+ "step": 1000
340
+ },
341
+ {
342
+ "epoch": 15.328467153284672,
343
+ "grad_norm": 28.980899810791016,
344
+ "learning_rate": 0.0002312432864187738,
345
+ "loss": 0.3798,
346
+ "step": 1050
347
+ },
348
+ {
349
+ "epoch": 16.05839416058394,
350
+ "grad_norm": 0.022609323263168335,
351
+ "learning_rate": 0.0002229906717850284,
352
+ "loss": 0.3672,
353
+ "step": 1100
354
+ },
355
+ {
356
+ "epoch": 16.78832116788321,
357
+ "grad_norm": 0.02360348217189312,
358
+ "learning_rate": 0.00021443763270373483,
359
+ "loss": 0.3715,
360
+ "step": 1150
361
+ },
362
+ {
363
+ "epoch": 17.51824817518248,
364
+ "grad_norm": 0.014020542614161968,
365
+ "learning_rate": 0.0002056193728793941,
366
+ "loss": 0.349,
367
+ "step": 1200
368
+ },
369
+ {
370
+ "epoch": 17.51824817518248,
371
+ "eval_accuracy": 0.8351648351648352,
372
+ "eval_confusion_matrix": [
373
+ [
374
+ 62,
375
+ 10,
376
+ 1,
377
+ 2
378
+ ],
379
+ [
380
+ 9,
381
+ 57,
382
+ 9,
383
+ 0
384
+ ],
385
+ [
386
+ 2,
387
+ 12,
388
+ 48,
389
+ 0
390
+ ],
391
+ [
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 61
396
+ ]
397
+ ],
398
+ "eval_f1": 0.8350675728555914,
399
+ "eval_loss": 0.8131950497627258,
400
+ "eval_precision": 0.8358475863688551,
401
+ "eval_recall": 0.8351648351648352,
402
+ "eval_runtime": 3.7788,
403
+ "eval_samples_per_second": 72.246,
404
+ "eval_steps_per_second": 0.794,
405
+ "step": 1200
406
+ },
407
+ {
408
+ "epoch": 18.248175182481752,
409
+ "grad_norm": 0.006028232164680958,
410
+ "learning_rate": 0.0001965721876463452,
411
+ "loss": 0.3491,
412
+ "step": 1250
413
+ },
414
+ {
415
+ "epoch": 18.978102189781023,
416
+ "grad_norm": 0.008285734802484512,
417
+ "learning_rate": 0.00018733331457973358,
418
+ "loss": 0.3489,
419
+ "step": 1300
420
+ },
421
+ {
422
+ "epoch": 19.708029197080293,
423
+ "grad_norm": 0.008053851313889027,
424
+ "learning_rate": 0.00017794078022828275,
425
+ "loss": 0.3497,
426
+ "step": 1350
427
+ },
428
+ {
429
+ "epoch": 20.437956204379564,
430
+ "grad_norm": 0.003234422067180276,
431
+ "learning_rate": 0.00016843324359970712,
432
+ "loss": 0.3488,
433
+ "step": 1400
434
+ },
435
+ {
436
+ "epoch": 20.437956204379564,
437
+ "eval_accuracy": 0.8461538461538461,
438
+ "eval_confusion_matrix": [
439
+ [
440
+ 61,
441
+ 11,
442
+ 1,
443
+ 2
444
+ ],
445
+ [
446
+ 8,
447
+ 57,
448
+ 10,
449
+ 0
450
+ ],
451
+ [
452
+ 0,
453
+ 10,
454
+ 52,
455
+ 0
456
+ ],
457
+ [
458
+ 0,
459
+ 0,
460
+ 0,
461
+ 61
462
+ ]
463
+ ],
464
+ "eval_f1": 0.8462423027109934,
465
+ "eval_loss": 0.7859560251235962,
466
+ "eval_precision": 0.8474363933035696,
467
+ "eval_recall": 0.8461538461538461,
468
+ "eval_runtime": 3.7947,
469
+ "eval_samples_per_second": 71.942,
470
+ "eval_steps_per_second": 0.791,
471
+ "step": 1400
472
+ },
473
+ {
474
+ "epoch": 21.16788321167883,
475
+ "grad_norm": 0.004595920909196138,
476
+ "learning_rate": 0.00015884983704296757,
477
+ "loss": 0.3488,
478
+ "step": 1450
479
+ },
480
+ {
481
+ "epoch": 21.8978102189781,
482
+ "grad_norm": 0.002511706668883562,
483
+ "learning_rate": 0.00014923000518228847,
484
+ "loss": 0.3488,
485
+ "step": 1500
486
+ },
487
+ {
488
+ "epoch": 22.62773722627737,
489
+ "grad_norm": 0.002340014325454831,
490
+ "learning_rate": 0.00013961334256587125,
491
+ "loss": 0.3488,
492
+ "step": 1550
493
+ },
494
+ {
495
+ "epoch": 23.357664233576642,
496
+ "grad_norm": 0.0028287076856940985,
497
+ "learning_rate": 0.00013003943069753198,
498
+ "loss": 0.3488,
499
+ "step": 1600
500
+ },
501
+ {
502
+ "epoch": 23.357664233576642,
503
+ "eval_accuracy": 0.8461538461538461,
504
+ "eval_confusion_matrix": [
505
+ [
506
+ 61,
507
+ 11,
508
+ 1,
509
+ 2
510
+ ],
511
+ [
512
+ 8,
513
+ 57,
514
+ 10,
515
+ 0
516
+ ],
517
+ [
518
+ 0,
519
+ 10,
520
+ 52,
521
+ 0
522
+ ],
523
+ [
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 61
528
+ ]
529
+ ],
530
+ "eval_f1": 0.8462423027109934,
531
+ "eval_loss": 0.7856015563011169,
532
+ "eval_precision": 0.8474363933035696,
533
+ "eval_recall": 0.8461538461538461,
534
+ "eval_runtime": 3.7861,
535
+ "eval_samples_per_second": 72.105,
536
+ "eval_steps_per_second": 0.792,
537
+ "step": 1600
538
+ },
539
+ {
540
+ "epoch": 24.087591240875913,
541
+ "grad_norm": 0.0027960864827036858,
542
+ "learning_rate": 0.00012054767512202832,
543
+ "loss": 0.3488,
544
+ "step": 1650
545
+ },
546
+ {
547
+ "epoch": 24.817518248175183,
548
+ "grad_norm": 0.0033820979297161102,
549
+ "learning_rate": 0.00011117714323462186,
550
+ "loss": 0.3488,
551
+ "step": 1700
552
+ },
553
+ {
554
+ "epoch": 25.547445255474454,
555
+ "grad_norm": 0.0034969367552548647,
556
+ "learning_rate": 0.00010196640348243974,
557
+ "loss": 0.3488,
558
+ "step": 1750
559
+ },
560
+ {
561
+ "epoch": 26.277372262773724,
562
+ "grad_norm": 0.0014958898536860943,
563
+ "learning_rate": 9.295336661947115e-05,
564
+ "loss": 0.3488,
565
+ "step": 1800
566
+ },
567
+ {
568
+ "epoch": 26.277372262773724,
569
+ "eval_accuracy": 0.8534798534798534,
570
+ "eval_confusion_matrix": [
571
+ [
572
+ 61,
573
+ 11,
574
+ 1,
575
+ 2
576
+ ],
577
+ [
578
+ 7,
579
+ 58,
580
+ 10,
581
+ 0
582
+ ],
583
+ [
584
+ 0,
585
+ 9,
586
+ 53,
587
+ 0
588
+ ],
589
+ [
590
+ 0,
591
+ 0,
592
+ 0,
593
+ 61
594
+ ]
595
+ ],
596
+ "eval_f1": 0.8535663673078441,
597
+ "eval_loss": 0.7831193804740906,
598
+ "eval_precision": 0.8551497604301419,
599
+ "eval_recall": 0.8534798534798534,
600
+ "eval_runtime": 3.7976,
601
+ "eval_samples_per_second": 71.888,
602
+ "eval_steps_per_second": 0.79,
603
+ "step": 1800
604
+ },
605
+ {
606
+ "epoch": 27.00729927007299,
607
+ "grad_norm": 0.004900149069726467,
608
+ "learning_rate": 8.417512966858319e-05,
609
+ "loss": 0.3488,
610
+ "step": 1850
611
+ },
612
+ {
613
+ "epoch": 27.73722627737226,
614
+ "grad_norm": 0.0018804975552484393,
615
+ "learning_rate": 7.566782323279578e-05,
616
+ "loss": 0.3488,
617
+ "step": 1900
618
+ },
619
+ {
620
+ "epoch": 28.467153284671532,
621
+ "grad_norm": 0.0019178036600351334,
622
+ "learning_rate": 6.746646278427247e-05,
623
+ "loss": 0.3488,
624
+ "step": 1950
625
+ },
626
+ {
627
+ "epoch": 29.197080291970803,
628
+ "grad_norm": 0.001025234698317945,
629
+ "learning_rate": 5.960480454311155e-05,
630
+ "loss": 0.3488,
631
+ "step": 2000
632
+ },
633
+ {
634
+ "epoch": 29.197080291970803,
635
+ "eval_accuracy": 0.8498168498168498,
636
+ "eval_confusion_matrix": [
637
+ [
638
+ 61,
639
+ 11,
640
+ 1,
641
+ 2
642
+ ],
643
+ [
644
+ 8,
645
+ 57,
646
+ 10,
647
+ 0
648
+ ],
649
+ [
650
+ 0,
651
+ 9,
652
+ 53,
653
+ 0
654
+ ],
655
+ [
656
+ 0,
657
+ 0,
658
+ 0,
659
+ 61
660
+ ]
661
+ ],
662
+ "eval_f1": 0.8496942339108237,
663
+ "eval_loss": 0.7866398692131042,
664
+ "eval_precision": 0.8506632615716467,
665
+ "eval_recall": 0.8498168498168498,
666
+ "eval_runtime": 3.7892,
667
+ "eval_samples_per_second": 72.047,
668
+ "eval_steps_per_second": 0.792,
669
+ "step": 2000
670
+ }
671
+ ],
672
+ "logging_steps": 50,
673
+ "max_steps": 2720,
674
+ "num_input_tokens_seen": 0,
675
+ "num_train_epochs": 40,
676
+ "save_steps": 200,
677
+ "stateful_callbacks": {
678
+ "EarlyStoppingCallback": {
679
+ "args": {
680
+ "early_stopping_patience": 5,
681
+ "early_stopping_threshold": 0.001
682
+ },
683
+ "attributes": {
684
+ "early_stopping_patience_counter": 0
685
+ }
686
+ },
687
+ "TrainerControl": {
688
+ "args": {
689
+ "should_epoch_stop": false,
690
+ "should_evaluate": false,
691
+ "should_log": false,
692
+ "should_save": true,
693
+ "should_training_stop": false
694
+ },
695
+ "attributes": {}
696
+ }
697
+ },
698
+ "total_flos": 7.237953449856e+16,
699
+ "train_batch_size": 8,
700
+ "trial_name": null,
701
+ "trial_params": null
702
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cd6053c9e08edd46715ea6144f2b03c4f4ac31b547b8ae042d0b5e4a21ad64
3
+ size 5240
checkpoint-2200/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-2200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c2d5edd9e72d71a658460a72f7eade4b945fee18b8ed3e0894b518b5620f57
3
+ size 94765560
checkpoint-2200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fac40752229bb8e74f81bd6991f7eb8726e1b1ac23e5d515ff28f6b8eb48a9b
3
+ size 189556666
checkpoint-2200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:623badb81ceb5d9e1a453f79a7cb1eace1544e4c3d7187e9e4ee4efb585b685c
3
+ size 14308
checkpoint-2200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406a44cce484614116f6be37f7336e180742882a67b41259e3e9eabb076d0fd3
3
+ size 1064
checkpoint-2200/trainer_state.json ADDED
@@ -0,0 +1,768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8535663673078441,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1800",
4
+ "epoch": 32.11678832116788,
5
+ "eval_steps": 200,
6
+ "global_step": 2200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7299270072992701,
13
+ "grad_norm": 2.289438486099243,
14
+ "learning_rate": 5.5147058823529414e-05,
15
+ "loss": 1.2878,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.4598540145985401,
20
+ "grad_norm": 2.5048491954803467,
21
+ "learning_rate": 0.00011029411764705883,
22
+ "loss": 0.8322,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 2.18978102189781,
27
+ "grad_norm": 11.18371295928955,
28
+ "learning_rate": 0.00016544117647058823,
29
+ "loss": 0.7897,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 2.9197080291970803,
34
+ "grad_norm": 9.702393531799316,
35
+ "learning_rate": 0.00022058823529411765,
36
+ "loss": 0.7149,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 2.9197080291970803,
41
+ "eval_accuracy": 0.7252747252747253,
42
+ "eval_confusion_matrix": [
43
+ [
44
+ 34,
45
+ 39,
46
+ 0,
47
+ 2
48
+ ],
49
+ [
50
+ 7,
51
+ 62,
52
+ 6,
53
+ 0
54
+ ],
55
+ [
56
+ 0,
57
+ 19,
58
+ 43,
59
+ 0
60
+ ],
61
+ [
62
+ 0,
63
+ 2,
64
+ 0,
65
+ 59
66
+ ]
67
+ ],
68
+ "eval_f1": 0.7260427659517454,
69
+ "eval_loss": 0.9058456420898438,
70
+ "eval_precision": 0.7828499608603893,
71
+ "eval_recall": 0.7252747252747253,
72
+ "eval_runtime": 3.7417,
73
+ "eval_samples_per_second": 72.962,
74
+ "eval_steps_per_second": 0.802,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 3.6496350364963503,
79
+ "grad_norm": 8.47255802154541,
80
+ "learning_rate": 0.000275735294117647,
81
+ "loss": 0.6917,
82
+ "step": 250
83
+ },
84
+ {
85
+ "epoch": 4.37956204379562,
86
+ "grad_norm": 16.689321517944336,
87
+ "learning_rate": 0.0002999031705390845,
88
+ "loss": 0.7264,
89
+ "step": 300
90
+ },
91
+ {
92
+ "epoch": 5.109489051094891,
93
+ "grad_norm": 1.7369310855865479,
94
+ "learning_rate": 0.00029924913005299595,
95
+ "loss": 0.6895,
96
+ "step": 350
97
+ },
98
+ {
99
+ "epoch": 5.839416058394161,
100
+ "grad_norm": 2.210369348526001,
101
+ "learning_rate": 0.0002979807906935489,
102
+ "loss": 0.6939,
103
+ "step": 400
104
+ },
105
+ {
106
+ "epoch": 5.839416058394161,
107
+ "eval_accuracy": 0.7509157509157509,
108
+ "eval_confusion_matrix": [
109
+ [
110
+ 66,
111
+ 2,
112
+ 0,
113
+ 7
114
+ ],
115
+ [
116
+ 29,
117
+ 38,
118
+ 7,
119
+ 1
120
+ ],
121
+ [
122
+ 2,
123
+ 20,
124
+ 40,
125
+ 0
126
+ ],
127
+ [
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 61
132
+ ]
133
+ ],
134
+ "eval_f1": 0.7418721712792054,
135
+ "eval_loss": 0.8107791543006897,
136
+ "eval_precision": 0.7517378077426524,
137
+ "eval_recall": 0.7509157509157509,
138
+ "eval_runtime": 3.7702,
139
+ "eval_samples_per_second": 72.409,
140
+ "eval_steps_per_second": 0.796,
141
+ "step": 400
142
+ },
143
+ {
144
+ "epoch": 6.569343065693431,
145
+ "grad_norm": 2.1358511447906494,
146
+ "learning_rate": 0.000296103372855926,
147
+ "loss": 0.5986,
148
+ "step": 450
149
+ },
150
+ {
151
+ "epoch": 7.299270072992701,
152
+ "grad_norm": 13.704009056091309,
153
+ "learning_rate": 0.0002936246038592886,
154
+ "loss": 0.5932,
155
+ "step": 500
156
+ },
157
+ {
158
+ "epoch": 8.02919708029197,
159
+ "grad_norm": 2.032876968383789,
160
+ "learning_rate": 0.00029055468614167716,
161
+ "loss": 0.5633,
162
+ "step": 550
163
+ },
164
+ {
165
+ "epoch": 8.75912408759124,
166
+ "grad_norm": 28.525798797607422,
167
+ "learning_rate": 0.00028690625526749705,
168
+ "loss": 0.4941,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 8.75912408759124,
173
+ "eval_accuracy": 0.8241758241758241,
174
+ "eval_confusion_matrix": [
175
+ [
176
+ 63,
177
+ 8,
178
+ 1,
179
+ 3
180
+ ],
181
+ [
182
+ 8,
183
+ 50,
184
+ 17,
185
+ 0
186
+ ],
187
+ [
188
+ 2,
189
+ 9,
190
+ 51,
191
+ 0
192
+ ],
193
+ [
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 61
198
+ ]
199
+ ],
200
+ "eval_f1": 0.8222676260809794,
201
+ "eval_loss": 0.7625077366828918,
202
+ "eval_precision": 0.8229409839103053,
203
+ "eval_recall": 0.8241758241758241,
204
+ "eval_runtime": 3.757,
205
+ "eval_samples_per_second": 72.664,
206
+ "eval_steps_per_second": 0.799,
207
+ "step": 600
208
+ },
209
+ {
210
+ "epoch": 9.489051094890511,
211
+ "grad_norm": 0.18371808528900146,
212
+ "learning_rate": 0.0002826943279204283,
213
+ "loss": 0.4842,
214
+ "step": 650
215
+ },
216
+ {
217
+ "epoch": 10.218978102189782,
218
+ "grad_norm": 11.426072120666504,
219
+ "learning_rate": 0.0002779362400958168,
220
+ "loss": 0.4352,
221
+ "step": 700
222
+ },
223
+ {
224
+ "epoch": 10.94890510948905,
225
+ "grad_norm": 8.062601089477539,
226
+ "learning_rate": 0.0002726515757469423,
227
+ "loss": 0.4447,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 11.678832116788321,
232
+ "grad_norm": 0.3985881805419922,
233
+ "learning_rate": 0.00026686208617885055,
234
+ "loss": 0.442,
235
+ "step": 800
236
+ },
237
+ {
238
+ "epoch": 11.678832116788321,
239
+ "eval_accuracy": 0.7985347985347986,
240
+ "eval_confusion_matrix": [
241
+ [
242
+ 66,
243
+ 6,
244
+ 1,
245
+ 2
246
+ ],
247
+ [
248
+ 15,
249
+ 32,
250
+ 26,
251
+ 2
252
+ ],
253
+ [
254
+ 2,
255
+ 1,
256
+ 59,
257
+ 0
258
+ ],
259
+ [
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 61
264
+ ]
265
+ ],
266
+ "eval_f1": 0.781170020153555,
267
+ "eval_loss": 0.9623217582702637,
268
+ "eval_precision": 0.8093701586901577,
269
+ "eval_recall": 0.7985347985347986,
270
+ "eval_runtime": 3.774,
271
+ "eval_samples_per_second": 72.337,
272
+ "eval_steps_per_second": 0.795,
273
+ "step": 800
274
+ },
275
+ {
276
+ "epoch": 12.408759124087592,
277
+ "grad_norm": 38.726985931396484,
278
+ "learning_rate": 0.0002605916005215186,
279
+ "loss": 0.4504,
280
+ "step": 850
281
+ },
282
+ {
283
+ "epoch": 13.138686131386862,
284
+ "grad_norm": 0.026563748717308044,
285
+ "learning_rate": 0.0002538659276508397,
286
+ "loss": 0.3903,
287
+ "step": 900
288
+ },
289
+ {
290
+ "epoch": 13.86861313868613,
291
+ "grad_norm": 0.06770322471857071,
292
+ "learning_rate": 0.0002467127499611136,
293
+ "loss": 0.4094,
294
+ "step": 950
295
+ },
296
+ {
297
+ "epoch": 14.598540145985401,
298
+ "grad_norm": 1.2612749338150024,
299
+ "learning_rate": 0.00023916150942626798,
300
+ "loss": 0.4188,
301
+ "step": 1000
302
+ },
303
+ {
304
+ "epoch": 14.598540145985401,
305
+ "eval_accuracy": 0.8315018315018315,
306
+ "eval_confusion_matrix": [
307
+ [
308
+ 60,
309
+ 9,
310
+ 2,
311
+ 4
312
+ ],
313
+ [
314
+ 8,
315
+ 56,
316
+ 11,
317
+ 0
318
+ ],
319
+ [
320
+ 1,
321
+ 11,
322
+ 50,
323
+ 0
324
+ ],
325
+ [
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 61
330
+ ]
331
+ ],
332
+ "eval_f1": 0.8307422385946511,
333
+ "eval_loss": 0.8534455299377441,
334
+ "eval_precision": 0.8312566016541674,
335
+ "eval_recall": 0.8315018315018315,
336
+ "eval_runtime": 3.796,
337
+ "eval_samples_per_second": 71.917,
338
+ "eval_steps_per_second": 0.79,
339
+ "step": 1000
340
+ },
341
+ {
342
+ "epoch": 15.328467153284672,
343
+ "grad_norm": 28.980899810791016,
344
+ "learning_rate": 0.0002312432864187738,
345
+ "loss": 0.3798,
346
+ "step": 1050
347
+ },
348
+ {
349
+ "epoch": 16.05839416058394,
350
+ "grad_norm": 0.022609323263168335,
351
+ "learning_rate": 0.0002229906717850284,
352
+ "loss": 0.3672,
353
+ "step": 1100
354
+ },
355
+ {
356
+ "epoch": 16.78832116788321,
357
+ "grad_norm": 0.02360348217189312,
358
+ "learning_rate": 0.00021443763270373483,
359
+ "loss": 0.3715,
360
+ "step": 1150
361
+ },
362
+ {
363
+ "epoch": 17.51824817518248,
364
+ "grad_norm": 0.014020542614161968,
365
+ "learning_rate": 0.0002056193728793941,
366
+ "loss": 0.349,
367
+ "step": 1200
368
+ },
369
+ {
370
+ "epoch": 17.51824817518248,
371
+ "eval_accuracy": 0.8351648351648352,
372
+ "eval_confusion_matrix": [
373
+ [
374
+ 62,
375
+ 10,
376
+ 1,
377
+ 2
378
+ ],
379
+ [
380
+ 9,
381
+ 57,
382
+ 9,
383
+ 0
384
+ ],
385
+ [
386
+ 2,
387
+ 12,
388
+ 48,
389
+ 0
390
+ ],
391
+ [
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 61
396
+ ]
397
+ ],
398
+ "eval_f1": 0.8350675728555914,
399
+ "eval_loss": 0.8131950497627258,
400
+ "eval_precision": 0.8358475863688551,
401
+ "eval_recall": 0.8351648351648352,
402
+ "eval_runtime": 3.7788,
403
+ "eval_samples_per_second": 72.246,
404
+ "eval_steps_per_second": 0.794,
405
+ "step": 1200
406
+ },
407
+ {
408
+ "epoch": 18.248175182481752,
409
+ "grad_norm": 0.006028232164680958,
410
+ "learning_rate": 0.0001965721876463452,
411
+ "loss": 0.3491,
412
+ "step": 1250
413
+ },
414
+ {
415
+ "epoch": 18.978102189781023,
416
+ "grad_norm": 0.008285734802484512,
417
+ "learning_rate": 0.00018733331457973358,
418
+ "loss": 0.3489,
419
+ "step": 1300
420
+ },
421
+ {
422
+ "epoch": 19.708029197080293,
423
+ "grad_norm": 0.008053851313889027,
424
+ "learning_rate": 0.00017794078022828275,
425
+ "loss": 0.3497,
426
+ "step": 1350
427
+ },
428
+ {
429
+ "epoch": 20.437956204379564,
430
+ "grad_norm": 0.003234422067180276,
431
+ "learning_rate": 0.00016843324359970712,
432
+ "loss": 0.3488,
433
+ "step": 1400
434
+ },
435
+ {
436
+ "epoch": 20.437956204379564,
437
+ "eval_accuracy": 0.8461538461538461,
438
+ "eval_confusion_matrix": [
439
+ [
440
+ 61,
441
+ 11,
442
+ 1,
443
+ 2
444
+ ],
445
+ [
446
+ 8,
447
+ 57,
448
+ 10,
449
+ 0
450
+ ],
451
+ [
452
+ 0,
453
+ 10,
454
+ 52,
455
+ 0
456
+ ],
457
+ [
458
+ 0,
459
+ 0,
460
+ 0,
461
+ 61
462
+ ]
463
+ ],
464
+ "eval_f1": 0.8462423027109934,
465
+ "eval_loss": 0.7859560251235962,
466
+ "eval_precision": 0.8474363933035696,
467
+ "eval_recall": 0.8461538461538461,
468
+ "eval_runtime": 3.7947,
469
+ "eval_samples_per_second": 71.942,
470
+ "eval_steps_per_second": 0.791,
471
+ "step": 1400
472
+ },
473
+ {
474
+ "epoch": 21.16788321167883,
475
+ "grad_norm": 0.004595920909196138,
476
+ "learning_rate": 0.00015884983704296757,
477
+ "loss": 0.3488,
478
+ "step": 1450
479
+ },
480
+ {
481
+ "epoch": 21.8978102189781,
482
+ "grad_norm": 0.002511706668883562,
483
+ "learning_rate": 0.00014923000518228847,
484
+ "loss": 0.3488,
485
+ "step": 1500
486
+ },
487
+ {
488
+ "epoch": 22.62773722627737,
489
+ "grad_norm": 0.002340014325454831,
490
+ "learning_rate": 0.00013961334256587125,
491
+ "loss": 0.3488,
492
+ "step": 1550
493
+ },
494
+ {
495
+ "epoch": 23.357664233576642,
496
+ "grad_norm": 0.0028287076856940985,
497
+ "learning_rate": 0.00013003943069753198,
498
+ "loss": 0.3488,
499
+ "step": 1600
500
+ },
501
+ {
502
+ "epoch": 23.357664233576642,
503
+ "eval_accuracy": 0.8461538461538461,
504
+ "eval_confusion_matrix": [
505
+ [
506
+ 61,
507
+ 11,
508
+ 1,
509
+ 2
510
+ ],
511
+ [
512
+ 8,
513
+ 57,
514
+ 10,
515
+ 0
516
+ ],
517
+ [
518
+ 0,
519
+ 10,
520
+ 52,
521
+ 0
522
+ ],
523
+ [
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 61
528
+ ]
529
+ ],
530
+ "eval_f1": 0.8462423027109934,
531
+ "eval_loss": 0.7856015563011169,
532
+ "eval_precision": 0.8474363933035696,
533
+ "eval_recall": 0.8461538461538461,
534
+ "eval_runtime": 3.7861,
535
+ "eval_samples_per_second": 72.105,
536
+ "eval_steps_per_second": 0.792,
537
+ "step": 1600
538
+ },
539
+ {
540
+ "epoch": 24.087591240875913,
541
+ "grad_norm": 0.0027960864827036858,
542
+ "learning_rate": 0.00012054767512202832,
543
+ "loss": 0.3488,
544
+ "step": 1650
545
+ },
546
+ {
547
+ "epoch": 24.817518248175183,
548
+ "grad_norm": 0.0033820979297161102,
549
+ "learning_rate": 0.00011117714323462186,
550
+ "loss": 0.3488,
551
+ "step": 1700
552
+ },
553
+ {
554
+ "epoch": 25.547445255474454,
555
+ "grad_norm": 0.0034969367552548647,
556
+ "learning_rate": 0.00010196640348243974,
557
+ "loss": 0.3488,
558
+ "step": 1750
559
+ },
560
+ {
561
+ "epoch": 26.277372262773724,
562
+ "grad_norm": 0.0014958898536860943,
563
+ "learning_rate": 9.295336661947115e-05,
564
+ "loss": 0.3488,
565
+ "step": 1800
566
+ },
567
+ {
568
+ "epoch": 26.277372262773724,
569
+ "eval_accuracy": 0.8534798534798534,
570
+ "eval_confusion_matrix": [
571
+ [
572
+ 61,
573
+ 11,
574
+ 1,
575
+ 2
576
+ ],
577
+ [
578
+ 7,
579
+ 58,
580
+ 10,
581
+ 0
582
+ ],
583
+ [
584
+ 0,
585
+ 9,
586
+ 53,
587
+ 0
588
+ ],
589
+ [
590
+ 0,
591
+ 0,
592
+ 0,
593
+ 61
594
+ ]
595
+ ],
596
+ "eval_f1": 0.8535663673078441,
597
+ "eval_loss": 0.7831193804740906,
598
+ "eval_precision": 0.8551497604301419,
599
+ "eval_recall": 0.8534798534798534,
600
+ "eval_runtime": 3.7976,
601
+ "eval_samples_per_second": 71.888,
602
+ "eval_steps_per_second": 0.79,
603
+ "step": 1800
604
+ },
605
+ {
606
+ "epoch": 27.00729927007299,
607
+ "grad_norm": 0.004900149069726467,
608
+ "learning_rate": 8.417512966858319e-05,
609
+ "loss": 0.3488,
610
+ "step": 1850
611
+ },
612
+ {
613
+ "epoch": 27.73722627737226,
614
+ "grad_norm": 0.0018804975552484393,
615
+ "learning_rate": 7.566782323279578e-05,
616
+ "loss": 0.3488,
617
+ "step": 1900
618
+ },
619
+ {
620
+ "epoch": 28.467153284671532,
621
+ "grad_norm": 0.0019178036600351334,
622
+ "learning_rate": 6.746646278427247e-05,
623
+ "loss": 0.3488,
624
+ "step": 1950
625
+ },
626
+ {
627
+ "epoch": 29.197080291970803,
628
+ "grad_norm": 0.001025234698317945,
629
+ "learning_rate": 5.960480454311155e-05,
630
+ "loss": 0.3488,
631
+ "step": 2000
632
+ },
633
+ {
634
+ "epoch": 29.197080291970803,
635
+ "eval_accuracy": 0.8498168498168498,
636
+ "eval_confusion_matrix": [
637
+ [
638
+ 61,
639
+ 11,
640
+ 1,
641
+ 2
642
+ ],
643
+ [
644
+ 8,
645
+ 57,
646
+ 10,
647
+ 0
648
+ ],
649
+ [
650
+ 0,
651
+ 9,
652
+ 53,
653
+ 0
654
+ ],
655
+ [
656
+ 0,
657
+ 0,
658
+ 0,
659
+ 61
660
+ ]
661
+ ],
662
+ "eval_f1": 0.8496942339108237,
663
+ "eval_loss": 0.7866398692131042,
664
+ "eval_precision": 0.8506632615716467,
665
+ "eval_recall": 0.8498168498168498,
666
+ "eval_runtime": 3.7892,
667
+ "eval_samples_per_second": 72.047,
668
+ "eval_steps_per_second": 0.792,
669
+ "step": 2000
670
+ },
671
+ {
672
+ "epoch": 29.927007299270073,
673
+ "grad_norm": 0.0027674695011228323,
674
+ "learning_rate": 5.2115206539129e-05,
675
+ "loss": 0.3488,
676
+ "step": 2050
677
+ },
678
+ {
679
+ "epoch": 30.656934306569344,
680
+ "grad_norm": 0.0016269112238660455,
681
+ "learning_rate": 4.5028495428494483e-05,
682
+ "loss": 0.3488,
683
+ "step": 2100
684
+ },
685
+ {
686
+ "epoch": 31.386861313868614,
687
+ "grad_norm": 0.0019462064374238253,
688
+ "learning_rate": 3.837383961339246e-05,
689
+ "loss": 0.3488,
690
+ "step": 2150
691
+ },
692
+ {
693
+ "epoch": 32.11678832116788,
694
+ "grad_norm": 0.0011992512736469507,
695
+ "learning_rate": 3.21786291869402e-05,
696
+ "loss": 0.3488,
697
+ "step": 2200
698
+ },
699
+ {
700
+ "epoch": 32.11678832116788,
701
+ "eval_accuracy": 0.8534798534798534,
702
+ "eval_confusion_matrix": [
703
+ [
704
+ 61,
705
+ 11,
706
+ 1,
707
+ 2
708
+ ],
709
+ [
710
+ 8,
711
+ 57,
712
+ 10,
713
+ 0
714
+ ],
715
+ [
716
+ 0,
717
+ 8,
718
+ 54,
719
+ 0
720
+ ],
721
+ [
722
+ 0,
723
+ 0,
724
+ 0,
725
+ 61
726
+ ]
727
+ ],
728
+ "eval_f1": 0.8531308487327289,
729
+ "eval_loss": 0.7856839895248413,
730
+ "eval_precision": 0.8539396783782831,
731
+ "eval_recall": 0.8534798534798534,
732
+ "eval_runtime": 3.787,
733
+ "eval_samples_per_second": 72.088,
734
+ "eval_steps_per_second": 0.792,
735
+ "step": 2200
736
+ }
737
+ ],
738
+ "logging_steps": 50,
739
+ "max_steps": 2720,
740
+ "num_input_tokens_seen": 0,
741
+ "num_train_epochs": 40,
742
+ "save_steps": 200,
743
+ "stateful_callbacks": {
744
+ "EarlyStoppingCallback": {
745
+ "args": {
746
+ "early_stopping_patience": 5,
747
+ "early_stopping_threshold": 0.001
748
+ },
749
+ "attributes": {
750
+ "early_stopping_patience_counter": 0
751
+ }
752
+ },
753
+ "TrainerControl": {
754
+ "args": {
755
+ "should_epoch_stop": false,
756
+ "should_evaluate": false,
757
+ "should_log": false,
758
+ "should_save": true,
759
+ "should_training_stop": false
760
+ },
761
+ "attributes": {}
762
+ }
763
+ },
764
+ "total_flos": 7.961612341248e+16,
765
+ "train_batch_size": 8,
766
+ "trial_name": null,
767
+ "trial_params": null
768
+ }
checkpoint-2200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cd6053c9e08edd46715ea6144f2b03c4f4ac31b547b8ae042d0b5e4a21ad64
3
+ size 5240
checkpoint-2400/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-2400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f6f9165a112c1c9f627b872ddba5c6f189bc27160f4e0df85281aa54151f930
3
+ size 94765560
checkpoint-2400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f151b348df4c0110eb4960671482caf90d968f80a4844aaea959dec4e29802fa
3
+ size 189556666
checkpoint-2400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c6c2284c30ed80bf04e4a7222520c39000a8d449d6494c1393520378ed2c60
3
+ size 14308
checkpoint-2400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2488c60c13a7ef6f9d44255069440fc1dd24d07b71e6efb75a3ad44fe512eaf8
3
+ size 1064
checkpoint-2400/trainer_state.json ADDED
@@ -0,0 +1,834 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8535663673078441,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-1800",
4
+ "epoch": 35.03649635036496,
5
+ "eval_steps": 200,
6
+ "global_step": 2400,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7299270072992701,
13
+ "grad_norm": 2.289438486099243,
14
+ "learning_rate": 5.5147058823529414e-05,
15
+ "loss": 1.2878,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.4598540145985401,
20
+ "grad_norm": 2.5048491954803467,
21
+ "learning_rate": 0.00011029411764705883,
22
+ "loss": 0.8322,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 2.18978102189781,
27
+ "grad_norm": 11.18371295928955,
28
+ "learning_rate": 0.00016544117647058823,
29
+ "loss": 0.7897,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 2.9197080291970803,
34
+ "grad_norm": 9.702393531799316,
35
+ "learning_rate": 0.00022058823529411765,
36
+ "loss": 0.7149,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 2.9197080291970803,
41
+ "eval_accuracy": 0.7252747252747253,
42
+ "eval_confusion_matrix": [
43
+ [
44
+ 34,
45
+ 39,
46
+ 0,
47
+ 2
48
+ ],
49
+ [
50
+ 7,
51
+ 62,
52
+ 6,
53
+ 0
54
+ ],
55
+ [
56
+ 0,
57
+ 19,
58
+ 43,
59
+ 0
60
+ ],
61
+ [
62
+ 0,
63
+ 2,
64
+ 0,
65
+ 59
66
+ ]
67
+ ],
68
+ "eval_f1": 0.7260427659517454,
69
+ "eval_loss": 0.9058456420898438,
70
+ "eval_precision": 0.7828499608603893,
71
+ "eval_recall": 0.7252747252747253,
72
+ "eval_runtime": 3.7417,
73
+ "eval_samples_per_second": 72.962,
74
+ "eval_steps_per_second": 0.802,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 3.6496350364963503,
79
+ "grad_norm": 8.47255802154541,
80
+ "learning_rate": 0.000275735294117647,
81
+ "loss": 0.6917,
82
+ "step": 250
83
+ },
84
+ {
85
+ "epoch": 4.37956204379562,
86
+ "grad_norm": 16.689321517944336,
87
+ "learning_rate": 0.0002999031705390845,
88
+ "loss": 0.7264,
89
+ "step": 300
90
+ },
91
+ {
92
+ "epoch": 5.109489051094891,
93
+ "grad_norm": 1.7369310855865479,
94
+ "learning_rate": 0.00029924913005299595,
95
+ "loss": 0.6895,
96
+ "step": 350
97
+ },
98
+ {
99
+ "epoch": 5.839416058394161,
100
+ "grad_norm": 2.210369348526001,
101
+ "learning_rate": 0.0002979807906935489,
102
+ "loss": 0.6939,
103
+ "step": 400
104
+ },
105
+ {
106
+ "epoch": 5.839416058394161,
107
+ "eval_accuracy": 0.7509157509157509,
108
+ "eval_confusion_matrix": [
109
+ [
110
+ 66,
111
+ 2,
112
+ 0,
113
+ 7
114
+ ],
115
+ [
116
+ 29,
117
+ 38,
118
+ 7,
119
+ 1
120
+ ],
121
+ [
122
+ 2,
123
+ 20,
124
+ 40,
125
+ 0
126
+ ],
127
+ [
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 61
132
+ ]
133
+ ],
134
+ "eval_f1": 0.7418721712792054,
135
+ "eval_loss": 0.8107791543006897,
136
+ "eval_precision": 0.7517378077426524,
137
+ "eval_recall": 0.7509157509157509,
138
+ "eval_runtime": 3.7702,
139
+ "eval_samples_per_second": 72.409,
140
+ "eval_steps_per_second": 0.796,
141
+ "step": 400
142
+ },
143
+ {
144
+ "epoch": 6.569343065693431,
145
+ "grad_norm": 2.1358511447906494,
146
+ "learning_rate": 0.000296103372855926,
147
+ "loss": 0.5986,
148
+ "step": 450
149
+ },
150
+ {
151
+ "epoch": 7.299270072992701,
152
+ "grad_norm": 13.704009056091309,
153
+ "learning_rate": 0.0002936246038592886,
154
+ "loss": 0.5932,
155
+ "step": 500
156
+ },
157
+ {
158
+ "epoch": 8.02919708029197,
159
+ "grad_norm": 2.032876968383789,
160
+ "learning_rate": 0.00029055468614167716,
161
+ "loss": 0.5633,
162
+ "step": 550
163
+ },
164
+ {
165
+ "epoch": 8.75912408759124,
166
+ "grad_norm": 28.525798797607422,
167
+ "learning_rate": 0.00028690625526749705,
168
+ "loss": 0.4941,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 8.75912408759124,
173
+ "eval_accuracy": 0.8241758241758241,
174
+ "eval_confusion_matrix": [
175
+ [
176
+ 63,
177
+ 8,
178
+ 1,
179
+ 3
180
+ ],
181
+ [
182
+ 8,
183
+ 50,
184
+ 17,
185
+ 0
186
+ ],
187
+ [
188
+ 2,
189
+ 9,
190
+ 51,
191
+ 0
192
+ ],
193
+ [
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 61
198
+ ]
199
+ ],
200
+ "eval_f1": 0.8222676260809794,
201
+ "eval_loss": 0.7625077366828918,
202
+ "eval_precision": 0.8229409839103053,
203
+ "eval_recall": 0.8241758241758241,
204
+ "eval_runtime": 3.757,
205
+ "eval_samples_per_second": 72.664,
206
+ "eval_steps_per_second": 0.799,
207
+ "step": 600
208
+ },
209
+ {
210
+ "epoch": 9.489051094890511,
211
+ "grad_norm": 0.18371808528900146,
212
+ "learning_rate": 0.0002826943279204283,
213
+ "loss": 0.4842,
214
+ "step": 650
215
+ },
216
+ {
217
+ "epoch": 10.218978102189782,
218
+ "grad_norm": 11.426072120666504,
219
+ "learning_rate": 0.0002779362400958168,
220
+ "loss": 0.4352,
221
+ "step": 700
222
+ },
223
+ {
224
+ "epoch": 10.94890510948905,
225
+ "grad_norm": 8.062601089477539,
226
+ "learning_rate": 0.0002726515757469423,
227
+ "loss": 0.4447,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 11.678832116788321,
232
+ "grad_norm": 0.3985881805419922,
233
+ "learning_rate": 0.00026686208617885055,
234
+ "loss": 0.442,
235
+ "step": 800
236
+ },
237
+ {
238
+ "epoch": 11.678832116788321,
239
+ "eval_accuracy": 0.7985347985347986,
240
+ "eval_confusion_matrix": [
241
+ [
242
+ 66,
243
+ 6,
244
+ 1,
245
+ 2
246
+ ],
247
+ [
248
+ 15,
249
+ 32,
250
+ 26,
251
+ 2
252
+ ],
253
+ [
254
+ 2,
255
+ 1,
256
+ 59,
257
+ 0
258
+ ],
259
+ [
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 61
264
+ ]
265
+ ],
266
+ "eval_f1": 0.781170020153555,
267
+ "eval_loss": 0.9623217582702637,
268
+ "eval_precision": 0.8093701586901577,
269
+ "eval_recall": 0.7985347985347986,
270
+ "eval_runtime": 3.774,
271
+ "eval_samples_per_second": 72.337,
272
+ "eval_steps_per_second": 0.795,
273
+ "step": 800
274
+ },
275
+ {
276
+ "epoch": 12.408759124087592,
277
+ "grad_norm": 38.726985931396484,
278
+ "learning_rate": 0.0002605916005215186,
279
+ "loss": 0.4504,
280
+ "step": 850
281
+ },
282
+ {
283
+ "epoch": 13.138686131386862,
284
+ "grad_norm": 0.026563748717308044,
285
+ "learning_rate": 0.0002538659276508397,
286
+ "loss": 0.3903,
287
+ "step": 900
288
+ },
289
+ {
290
+ "epoch": 13.86861313868613,
291
+ "grad_norm": 0.06770322471857071,
292
+ "learning_rate": 0.0002467127499611136,
293
+ "loss": 0.4094,
294
+ "step": 950
295
+ },
296
+ {
297
+ "epoch": 14.598540145985401,
298
+ "grad_norm": 1.2612749338150024,
299
+ "learning_rate": 0.00023916150942626798,
300
+ "loss": 0.4188,
301
+ "step": 1000
302
+ },
303
+ {
304
+ "epoch": 14.598540145985401,
305
+ "eval_accuracy": 0.8315018315018315,
306
+ "eval_confusion_matrix": [
307
+ [
308
+ 60,
309
+ 9,
310
+ 2,
311
+ 4
312
+ ],
313
+ [
314
+ 8,
315
+ 56,
316
+ 11,
317
+ 0
318
+ ],
319
+ [
320
+ 1,
321
+ 11,
322
+ 50,
323
+ 0
324
+ ],
325
+ [
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 61
330
+ ]
331
+ ],
332
+ "eval_f1": 0.8307422385946511,
333
+ "eval_loss": 0.8534455299377441,
334
+ "eval_precision": 0.8312566016541674,
335
+ "eval_recall": 0.8315018315018315,
336
+ "eval_runtime": 3.796,
337
+ "eval_samples_per_second": 71.917,
338
+ "eval_steps_per_second": 0.79,
339
+ "step": 1000
340
+ },
341
+ {
342
+ "epoch": 15.328467153284672,
343
+ "grad_norm": 28.980899810791016,
344
+ "learning_rate": 0.0002312432864187738,
345
+ "loss": 0.3798,
346
+ "step": 1050
347
+ },
348
+ {
349
+ "epoch": 16.05839416058394,
350
+ "grad_norm": 0.022609323263168335,
351
+ "learning_rate": 0.0002229906717850284,
352
+ "loss": 0.3672,
353
+ "step": 1100
354
+ },
355
+ {
356
+ "epoch": 16.78832116788321,
357
+ "grad_norm": 0.02360348217189312,
358
+ "learning_rate": 0.00021443763270373483,
359
+ "loss": 0.3715,
360
+ "step": 1150
361
+ },
362
+ {
363
+ "epoch": 17.51824817518248,
364
+ "grad_norm": 0.014020542614161968,
365
+ "learning_rate": 0.0002056193728793941,
366
+ "loss": 0.349,
367
+ "step": 1200
368
+ },
369
+ {
370
+ "epoch": 17.51824817518248,
371
+ "eval_accuracy": 0.8351648351648352,
372
+ "eval_confusion_matrix": [
373
+ [
374
+ 62,
375
+ 10,
376
+ 1,
377
+ 2
378
+ ],
379
+ [
380
+ 9,
381
+ 57,
382
+ 9,
383
+ 0
384
+ ],
385
+ [
386
+ 2,
387
+ 12,
388
+ 48,
389
+ 0
390
+ ],
391
+ [
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 61
396
+ ]
397
+ ],
398
+ "eval_f1": 0.8350675728555914,
399
+ "eval_loss": 0.8131950497627258,
400
+ "eval_precision": 0.8358475863688551,
401
+ "eval_recall": 0.8351648351648352,
402
+ "eval_runtime": 3.7788,
403
+ "eval_samples_per_second": 72.246,
404
+ "eval_steps_per_second": 0.794,
405
+ "step": 1200
406
+ },
407
+ {
408
+ "epoch": 18.248175182481752,
409
+ "grad_norm": 0.006028232164680958,
410
+ "learning_rate": 0.0001965721876463452,
411
+ "loss": 0.3491,
412
+ "step": 1250
413
+ },
414
+ {
415
+ "epoch": 18.978102189781023,
416
+ "grad_norm": 0.008285734802484512,
417
+ "learning_rate": 0.00018733331457973358,
418
+ "loss": 0.3489,
419
+ "step": 1300
420
+ },
421
+ {
422
+ "epoch": 19.708029197080293,
423
+ "grad_norm": 0.008053851313889027,
424
+ "learning_rate": 0.00017794078022828275,
425
+ "loss": 0.3497,
426
+ "step": 1350
427
+ },
428
+ {
429
+ "epoch": 20.437956204379564,
430
+ "grad_norm": 0.003234422067180276,
431
+ "learning_rate": 0.00016843324359970712,
432
+ "loss": 0.3488,
433
+ "step": 1400
434
+ },
435
+ {
436
+ "epoch": 20.437956204379564,
437
+ "eval_accuracy": 0.8461538461538461,
438
+ "eval_confusion_matrix": [
439
+ [
440
+ 61,
441
+ 11,
442
+ 1,
443
+ 2
444
+ ],
445
+ [
446
+ 8,
447
+ 57,
448
+ 10,
449
+ 0
450
+ ],
451
+ [
452
+ 0,
453
+ 10,
454
+ 52,
455
+ 0
456
+ ],
457
+ [
458
+ 0,
459
+ 0,
460
+ 0,
461
+ 61
462
+ ]
463
+ ],
464
+ "eval_f1": 0.8462423027109934,
465
+ "eval_loss": 0.7859560251235962,
466
+ "eval_precision": 0.8474363933035696,
467
+ "eval_recall": 0.8461538461538461,
468
+ "eval_runtime": 3.7947,
469
+ "eval_samples_per_second": 71.942,
470
+ "eval_steps_per_second": 0.791,
471
+ "step": 1400
472
+ },
473
+ {
474
+ "epoch": 21.16788321167883,
475
+ "grad_norm": 0.004595920909196138,
476
+ "learning_rate": 0.00015884983704296757,
477
+ "loss": 0.3488,
478
+ "step": 1450
479
+ },
480
+ {
481
+ "epoch": 21.8978102189781,
482
+ "grad_norm": 0.002511706668883562,
483
+ "learning_rate": 0.00014923000518228847,
484
+ "loss": 0.3488,
485
+ "step": 1500
486
+ },
487
+ {
488
+ "epoch": 22.62773722627737,
489
+ "grad_norm": 0.002340014325454831,
490
+ "learning_rate": 0.00013961334256587125,
491
+ "loss": 0.3488,
492
+ "step": 1550
493
+ },
494
+ {
495
+ "epoch": 23.357664233576642,
496
+ "grad_norm": 0.0028287076856940985,
497
+ "learning_rate": 0.00013003943069753198,
498
+ "loss": 0.3488,
499
+ "step": 1600
500
+ },
501
+ {
502
+ "epoch": 23.357664233576642,
503
+ "eval_accuracy": 0.8461538461538461,
504
+ "eval_confusion_matrix": [
505
+ [
506
+ 61,
507
+ 11,
508
+ 1,
509
+ 2
510
+ ],
511
+ [
512
+ 8,
513
+ 57,
514
+ 10,
515
+ 0
516
+ ],
517
+ [
518
+ 0,
519
+ 10,
520
+ 52,
521
+ 0
522
+ ],
523
+ [
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 61
528
+ ]
529
+ ],
530
+ "eval_f1": 0.8462423027109934,
531
+ "eval_loss": 0.7856015563011169,
532
+ "eval_precision": 0.8474363933035696,
533
+ "eval_recall": 0.8461538461538461,
534
+ "eval_runtime": 3.7861,
535
+ "eval_samples_per_second": 72.105,
536
+ "eval_steps_per_second": 0.792,
537
+ "step": 1600
538
+ },
539
+ {
540
+ "epoch": 24.087591240875913,
541
+ "grad_norm": 0.0027960864827036858,
542
+ "learning_rate": 0.00012054767512202832,
543
+ "loss": 0.3488,
544
+ "step": 1650
545
+ },
546
+ {
547
+ "epoch": 24.817518248175183,
548
+ "grad_norm": 0.0033820979297161102,
549
+ "learning_rate": 0.00011117714323462186,
550
+ "loss": 0.3488,
551
+ "step": 1700
552
+ },
553
+ {
554
+ "epoch": 25.547445255474454,
555
+ "grad_norm": 0.0034969367552548647,
556
+ "learning_rate": 0.00010196640348243974,
557
+ "loss": 0.3488,
558
+ "step": 1750
559
+ },
560
+ {
561
+ "epoch": 26.277372262773724,
562
+ "grad_norm": 0.0014958898536860943,
563
+ "learning_rate": 9.295336661947115e-05,
564
+ "loss": 0.3488,
565
+ "step": 1800
566
+ },
567
+ {
568
+ "epoch": 26.277372262773724,
569
+ "eval_accuracy": 0.8534798534798534,
570
+ "eval_confusion_matrix": [
571
+ [
572
+ 61,
573
+ 11,
574
+ 1,
575
+ 2
576
+ ],
577
+ [
578
+ 7,
579
+ 58,
580
+ 10,
581
+ 0
582
+ ],
583
+ [
584
+ 0,
585
+ 9,
586
+ 53,
587
+ 0
588
+ ],
589
+ [
590
+ 0,
591
+ 0,
592
+ 0,
593
+ 61
594
+ ]
595
+ ],
596
+ "eval_f1": 0.8535663673078441,
597
+ "eval_loss": 0.7831193804740906,
598
+ "eval_precision": 0.8551497604301419,
599
+ "eval_recall": 0.8534798534798534,
600
+ "eval_runtime": 3.7976,
601
+ "eval_samples_per_second": 71.888,
602
+ "eval_steps_per_second": 0.79,
603
+ "step": 1800
604
+ },
605
+ {
606
+ "epoch": 27.00729927007299,
607
+ "grad_norm": 0.004900149069726467,
608
+ "learning_rate": 8.417512966858319e-05,
609
+ "loss": 0.3488,
610
+ "step": 1850
611
+ },
612
+ {
613
+ "epoch": 27.73722627737226,
614
+ "grad_norm": 0.0018804975552484393,
615
+ "learning_rate": 7.566782323279578e-05,
616
+ "loss": 0.3488,
617
+ "step": 1900
618
+ },
619
+ {
620
+ "epoch": 28.467153284671532,
621
+ "grad_norm": 0.0019178036600351334,
622
+ "learning_rate": 6.746646278427247e-05,
623
+ "loss": 0.3488,
624
+ "step": 1950
625
+ },
626
+ {
627
+ "epoch": 29.197080291970803,
628
+ "grad_norm": 0.001025234698317945,
629
+ "learning_rate": 5.960480454311155e-05,
630
+ "loss": 0.3488,
631
+ "step": 2000
632
+ },
633
+ {
634
+ "epoch": 29.197080291970803,
635
+ "eval_accuracy": 0.8498168498168498,
636
+ "eval_confusion_matrix": [
637
+ [
638
+ 61,
639
+ 11,
640
+ 1,
641
+ 2
642
+ ],
643
+ [
644
+ 8,
645
+ 57,
646
+ 10,
647
+ 0
648
+ ],
649
+ [
650
+ 0,
651
+ 9,
652
+ 53,
653
+ 0
654
+ ],
655
+ [
656
+ 0,
657
+ 0,
658
+ 0,
659
+ 61
660
+ ]
661
+ ],
662
+ "eval_f1": 0.8496942339108237,
663
+ "eval_loss": 0.7866398692131042,
664
+ "eval_precision": 0.8506632615716467,
665
+ "eval_recall": 0.8498168498168498,
666
+ "eval_runtime": 3.7892,
667
+ "eval_samples_per_second": 72.047,
668
+ "eval_steps_per_second": 0.792,
669
+ "step": 2000
670
+ },
671
+ {
672
+ "epoch": 29.927007299270073,
673
+ "grad_norm": 0.0027674695011228323,
674
+ "learning_rate": 5.2115206539129e-05,
675
+ "loss": 0.3488,
676
+ "step": 2050
677
+ },
678
+ {
679
+ "epoch": 30.656934306569344,
680
+ "grad_norm": 0.0016269112238660455,
681
+ "learning_rate": 4.5028495428494483e-05,
682
+ "loss": 0.3488,
683
+ "step": 2100
684
+ },
685
+ {
686
+ "epoch": 31.386861313868614,
687
+ "grad_norm": 0.0019462064374238253,
688
+ "learning_rate": 3.837383961339246e-05,
689
+ "loss": 0.3488,
690
+ "step": 2150
691
+ },
692
+ {
693
+ "epoch": 32.11678832116788,
694
+ "grad_norm": 0.0011992512736469507,
695
+ "learning_rate": 3.21786291869402e-05,
696
+ "loss": 0.3488,
697
+ "step": 2200
698
+ },
699
+ {
700
+ "epoch": 32.11678832116788,
701
+ "eval_accuracy": 0.8534798534798534,
702
+ "eval_confusion_matrix": [
703
+ [
704
+ 61,
705
+ 11,
706
+ 1,
707
+ 2
708
+ ],
709
+ [
710
+ 8,
711
+ 57,
712
+ 10,
713
+ 0
714
+ ],
715
+ [
716
+ 0,
717
+ 8,
718
+ 54,
719
+ 0
720
+ ],
721
+ [
722
+ 0,
723
+ 0,
724
+ 0,
725
+ 61
726
+ ]
727
+ ],
728
+ "eval_f1": 0.8531308487327289,
729
+ "eval_loss": 0.7856839895248413,
730
+ "eval_precision": 0.8539396783782831,
731
+ "eval_recall": 0.8534798534798534,
732
+ "eval_runtime": 3.787,
733
+ "eval_samples_per_second": 72.088,
734
+ "eval_steps_per_second": 0.792,
735
+ "step": 2200
736
+ },
737
+ {
738
+ "epoch": 32.846715328467155,
739
+ "grad_norm": 0.0029719627927988768,
740
+ "learning_rate": 2.6468363197499458e-05,
741
+ "loss": 0.3488,
742
+ "step": 2250
743
+ },
744
+ {
745
+ "epoch": 33.57664233576642,
746
+ "grad_norm": 0.0012639207998290658,
747
+ "learning_rate": 2.1266544696395582e-05,
748
+ "loss": 0.3488,
749
+ "step": 2300
750
+ },
751
+ {
752
+ "epoch": 34.306569343065696,
753
+ "grad_norm": 0.0011322245700284839,
754
+ "learning_rate": 1.659458400101879e-05,
755
+ "loss": 0.3488,
756
+ "step": 2350
757
+ },
758
+ {
759
+ "epoch": 35.03649635036496,
760
+ "grad_norm": 0.002087602624669671,
761
+ "learning_rate": 1.2471710571470578e-05,
762
+ "loss": 0.3488,
763
+ "step": 2400
764
+ },
765
+ {
766
+ "epoch": 35.03649635036496,
767
+ "eval_accuracy": 0.8498168498168498,
768
+ "eval_confusion_matrix": [
769
+ [
770
+ 61,
771
+ 11,
772
+ 1,
773
+ 2
774
+ ],
775
+ [
776
+ 8,
777
+ 57,
778
+ 10,
779
+ 0
780
+ ],
781
+ [
782
+ 0,
783
+ 9,
784
+ 53,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 0,
790
+ 0,
791
+ 61
792
+ ]
793
+ ],
794
+ "eval_f1": 0.8496942339108237,
795
+ "eval_loss": 0.7856935858726501,
796
+ "eval_precision": 0.8506632615716467,
797
+ "eval_recall": 0.8498168498168498,
798
+ "eval_runtime": 3.788,
799
+ "eval_samples_per_second": 72.069,
800
+ "eval_steps_per_second": 0.792,
801
+ "step": 2400
802
+ }
803
+ ],
804
+ "logging_steps": 50,
805
+ "max_steps": 2720,
806
+ "num_input_tokens_seen": 0,
807
+ "num_train_epochs": 40,
808
+ "save_steps": 200,
809
+ "stateful_callbacks": {
810
+ "EarlyStoppingCallback": {
811
+ "args": {
812
+ "early_stopping_patience": 5,
813
+ "early_stopping_threshold": 0.001
814
+ },
815
+ "attributes": {
816
+ "early_stopping_patience_counter": 0
817
+ }
818
+ },
819
+ "TrainerControl": {
820
+ "args": {
821
+ "should_epoch_stop": false,
822
+ "should_evaluate": false,
823
+ "should_log": false,
824
+ "should_save": true,
825
+ "should_training_stop": false
826
+ },
827
+ "attributes": {}
828
+ }
829
+ },
830
+ "total_flos": 8.68527123264e+16,
831
+ "train_batch_size": 8,
832
+ "trial_name": null,
833
+ "trial_params": null
834
+ }
checkpoint-2400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cd6053c9e08edd46715ea6144f2b03c4f4ac31b547b8ae042d0b5e4a21ad64
3
+ size 5240
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58cf140280d70389e1aece2ee9a69bdfb705db914d4944c5f4efd478daa1fd13
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c75935ca7cf6cf2624c007d676c6f4095f4ab57f426f3d85e0a4fe5b16078bb
3
  size 94765560
runs/Sep14_17-06-50_ubumarcos/events.out.tfevents.1726327100.ubumarcos CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d62f4c346935efa7b0e7b0b76b2c91431f8f67b53dc9c6fc95a650671d29a17e
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75de08e534bbfb12e3a8fc006cb6ab6612e4998c46526c752f20021f20b7d043
3
+ size 503
runs/Sep14_18-07-00_ubumarcos/events.out.tfevents.1726330021.ubumarcos ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7aef52f1e01abd8b1f2974e343dfab563bfe7b9dceb1a4e16c6eaea7954c87b
3
+ size 22146
runs/Sep14_18-24-29_ubumarcos/events.out.tfevents.1726331070.ubumarcos ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:623ee29ee6d95a119d7d22770b2fa5d05167ef523e56637865d7dd27219b6622
3
+ size 6921
runs/Sep14_18-24-29_ubumarcos/events.out.tfevents.1726331984.ubumarcos ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4fede9fcdd631a8ec2518ba9c9ab15f1bfc3400996d5c026e5f67af3e9fe841
3
+ size 40
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c36e688104e01b3bf86e2899b93cb9a0868d0f8f810b28125b46e47948bf14
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8b55b425459f7b65521e542a14dc1670fa162e08d497ee83bde1c914e74cf6
3
  size 5240