Marcos12886 commited on
Commit
a0c9418
1 Parent(s): 08fe348

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +15 -15
  2. checkpoint-121/model.safetensors +1 -1
  3. checkpoint-121/optimizer.pt +1 -1
  4. checkpoint-121/scheduler.pt +1 -1
  5. checkpoint-121/trainer_state.json +43 -43
  6. checkpoint-121/training_args.bin +1 -1
  7. checkpoint-145/model.safetensors +1 -1
  8. checkpoint-145/optimizer.pt +1 -1
  9. checkpoint-145/scheduler.pt +1 -1
  10. checkpoint-145/trainer_state.json +51 -51
  11. checkpoint-145/training_args.bin +1 -1
  12. checkpoint-169/model.safetensors +1 -1
  13. checkpoint-169/optimizer.pt +1 -1
  14. checkpoint-169/rng_state.pth +1 -1
  15. checkpoint-169/scheduler.pt +1 -1
  16. checkpoint-169/trainer_state.json +73 -97
  17. checkpoint-169/training_args.bin +1 -1
  18. checkpoint-194/model.safetensors +1 -1
  19. checkpoint-194/optimizer.pt +1 -1
  20. checkpoint-194/scheduler.pt +1 -1
  21. checkpoint-194/trainer_state.json +68 -68
  22. checkpoint-194/training_args.bin +1 -1
  23. checkpoint-218/model.safetensors +1 -1
  24. checkpoint-218/optimizer.pt +1 -1
  25. checkpoint-218/scheduler.pt +1 -1
  26. checkpoint-218/trainer_state.json +76 -76
  27. checkpoint-218/training_args.bin +1 -1
  28. checkpoint-24/model.safetensors +1 -1
  29. checkpoint-24/optimizer.pt +1 -1
  30. checkpoint-24/scheduler.pt +1 -1
  31. checkpoint-24/trainer_state.json +11 -11
  32. checkpoint-24/training_args.bin +1 -1
  33. checkpoint-240/config.json +85 -0
  34. checkpoint-240/model.safetensors +3 -0
  35. checkpoint-240/optimizer.pt +3 -0
  36. checkpoint-240/rng_state.pth +3 -0
  37. checkpoint-240/scheduler.pt +3 -0
  38. checkpoint-240/trainer_state.json +162 -0
  39. checkpoint-240/training_args.bin +3 -0
  40. checkpoint-48/model.safetensors +1 -1
  41. checkpoint-48/optimizer.pt +1 -1
  42. checkpoint-48/scheduler.pt +1 -1
  43. checkpoint-48/trainer_state.json +19 -19
  44. checkpoint-48/training_args.bin +1 -1
  45. checkpoint-72/model.safetensors +1 -1
  46. checkpoint-72/optimizer.pt +1 -1
  47. checkpoint-72/scheduler.pt +1 -1
  48. checkpoint-72/trainer_state.json +27 -27
  49. checkpoint-72/training_args.bin +1 -1
  50. checkpoint-97/model.safetensors +1 -1
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.4003
25
- - Accuracy: 0.8603
26
- - Precision: 0.8644
27
- - Recall: 0.8603
28
- - F1: 0.8611
29
 
30
  ## Model description
31
 
@@ -59,16 +59,16 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
61
  |:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
62
- | No log | 0.9617 | 22 | 1.1426 | 0.4877 | 0.3200 | 0.4877 | 0.3244 |
63
- | No log | 1.9672 | 45 | 0.8035 | 0.6877 | 0.6781 | 0.6877 | 0.6726 |
64
- | No log | 2.9727 | 68 | 0.5866 | 0.7370 | 0.7267 | 0.7370 | 0.7179 |
65
- | No log | 3.9781 | 91 | 0.5180 | 0.7945 | 0.7922 | 0.7945 | 0.7853 |
66
- | No log | 4.9836 | 114 | 0.6972 | 0.7233 | 0.7954 | 0.7233 | 0.7300 |
67
- | No log | 5.9891 | 137 | 0.5580 | 0.8055 | 0.8052 | 0.8055 | 0.7958 |
68
- | No log | 6.9945 | 160 | 0.3853 | 0.8603 | 0.8584 | 0.8603 | 0.8584 |
69
- | No log | 8.0 | 183 | 0.4090 | 0.8301 | 0.8415 | 0.8301 | 0.8318 |
70
- | No log | 8.9617 | 205 | 0.3923 | 0.8630 | 0.8660 | 0.8630 | 0.8633 |
71
- | No log | 9.6175 | 220 | 0.4003 | 0.8603 | 0.8644 | 0.8603 | 0.8611 |
72
 
73
 
74
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.4341
25
+ - Accuracy: 0.8656
26
+ - Precision: 0.8628
27
+ - Recall: 0.8656
28
+ - F1: 0.8629
29
 
30
  ## Model description
31
 
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
61
  |:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
62
+ | No log | 0.9897 | 24 | 1.1718 | 0.4832 | 0.2335 | 0.4832 | 0.3148 |
63
+ | No log | 1.9794 | 48 | 0.7551 | 0.7235 | 0.7164 | 0.7235 | 0.7026 |
64
+ | No log | 2.9691 | 72 | 0.6834 | 0.7313 | 0.7621 | 0.7313 | 0.7074 |
65
+ | No log | 4.0 | 97 | 0.9938 | 0.6202 | 0.6844 | 0.6202 | 0.6161 |
66
+ | No log | 4.9897 | 121 | 0.4973 | 0.7881 | 0.7961 | 0.7881 | 0.7854 |
67
+ | No log | 5.9794 | 145 | 0.5254 | 0.7804 | 0.7843 | 0.7804 | 0.7817 |
68
+ | No log | 6.9691 | 169 | 0.4264 | 0.8475 | 0.8558 | 0.8475 | 0.8504 |
69
+ | No log | 8.0 | 194 | 0.4415 | 0.8656 | 0.8670 | 0.8656 | 0.8628 |
70
+ | No log | 8.9897 | 218 | 0.4115 | 0.8656 | 0.8651 | 0.8656 | 0.8649 |
71
+ | No log | 9.8969 | 240 | 0.4341 | 0.8656 | 0.8628 | 0.8656 | 0.8629 |
72
 
73
 
74
  ### Framework versions
checkpoint-121/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61738703583616f26bc5974045f101b4badf661cb1bd5759b55ffedbf42f8309
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70018b912cc31f04407d1e82b6579b497cb0a4c9d1fe58cce991fc2459540c94
3
  size 94765560
checkpoint-121/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04d7aa37706181567dea5d9c17d0a79ae70a3f43b546fd604def5a5c896f325
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:997c0052dc18c15b0c00ff6b7f5f979b1f10ee85d5dd478ac31aec3c59104615
3
  size 189556666
checkpoint-121/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c4a0a93ff2c02ad8dfec37403a0eff289573d406127ab0a60fb4386bc8bcb2b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9c5f2a04362b085af0242d72c5af05a769e7bae4cee46ee7d346c9df7f1eba
3
  size 1064
checkpoint-121/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8010335917312662,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 4.989690721649485,
5
  "eval_steps": 500,
@@ -10,69 +10,69 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  }
71
  ],
72
  "logging_steps": 500,
73
- "max_steps": 360,
74
  "num_input_tokens_seen": 0,
75
- "num_train_epochs": 15,
76
  "save_steps": 500,
77
  "stateful_callbacks": {
78
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.7881136950904393,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 4.989690721649485,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6201550387596899,
50
+ "eval_f1": 0.616059564047211,
51
+ "eval_loss": 0.9937827587127686,
52
+ "eval_precision": 0.6843785822798868,
53
+ "eval_recall": 0.6201550387596899,
54
+ "eval_runtime": 1.5998,
55
+ "eval_samples_per_second": 241.901,
56
+ "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7881136950904393,
62
+ "eval_f1": 0.7854190251131377,
63
+ "eval_loss": 0.4972754120826721,
64
+ "eval_precision": 0.7961139879912458,
65
+ "eval_recall": 0.7881136950904393,
66
+ "eval_runtime": 1.5913,
67
+ "eval_samples_per_second": 243.196,
68
+ "eval_steps_per_second": 30.792,
69
  "step": 121
70
  }
71
  ],
72
  "logging_steps": 500,
73
+ "max_steps": 240,
74
  "num_input_tokens_seen": 0,
75
+ "num_train_epochs": 10,
76
  "save_steps": 500,
77
  "stateful_callbacks": {
78
  "EarlyStoppingCallback": {
checkpoint-121/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-145/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62b1cac8d8be6152086485540750316f43a58660bddd11c298bb50d0f3f5f531
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b4c786da0e0851be69580368fd5d534a86467be3071d9ea96324ca5e0176a7
3
  size 94765560
checkpoint-145/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f74fa4c7ca2e76ec752e5b913444b39e61cef0b5760afb453b5c55805b47cacc
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a673f8f7ab1809cc9b549b3982b0a64589cbdccccf65a873c995d3b5ba6af28d
3
  size 189556666
checkpoint-145/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62e2160d5bd28532b533e3efabec8ccad052eb12401ff324f1c912ac3e74c929
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce77a15ff28da0b94b70192478b0d3dfb304ac1b9a3e90e47f0a9e682aca696
3
  size 1064
checkpoint-145/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8010335917312662,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 5.979381443298969,
5
  "eval_steps": 500,
@@ -10,81 +10,81 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  }
83
  ],
84
  "logging_steps": 500,
85
- "max_steps": 360,
86
  "num_input_tokens_seen": 0,
87
- "num_train_epochs": 15,
88
  "save_steps": 500,
89
  "stateful_callbacks": {
90
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.7881136950904393,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 5.979381443298969,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6201550387596899,
50
+ "eval_f1": 0.616059564047211,
51
+ "eval_loss": 0.9937827587127686,
52
+ "eval_precision": 0.6843785822798868,
53
+ "eval_recall": 0.6201550387596899,
54
+ "eval_runtime": 1.5998,
55
+ "eval_samples_per_second": 241.901,
56
+ "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7881136950904393,
62
+ "eval_f1": 0.7854190251131377,
63
+ "eval_loss": 0.4972754120826721,
64
+ "eval_precision": 0.7961139879912458,
65
+ "eval_recall": 0.7881136950904393,
66
+ "eval_runtime": 1.5913,
67
+ "eval_samples_per_second": 243.196,
68
+ "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.7803617571059431,
74
+ "eval_f1": 0.781727235222138,
75
+ "eval_loss": 0.5254013538360596,
76
+ "eval_precision": 0.7843017570642586,
77
+ "eval_recall": 0.7803617571059431,
78
+ "eval_runtime": 1.6096,
79
+ "eval_samples_per_second": 240.428,
80
+ "eval_steps_per_second": 30.442,
81
  "step": 145
82
  }
83
  ],
84
  "logging_steps": 500,
85
+ "max_steps": 240,
86
  "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 10,
88
  "save_steps": 500,
89
  "stateful_callbacks": {
90
  "EarlyStoppingCallback": {
checkpoint-145/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-169/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57abe97d93d5519d7d9477681f8db2f99cf47daf96fd63ca516ef55b3e3f9045
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddbb394005b9ac70d5bc17bcb01362284d0d2278a1e44b6ce7c3513e050d418
3
  size 94765560
checkpoint-169/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4802e1fb9625031658b7796945aeb5ba21b25214affa519eae03e83c38d1acc7
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e66e987ee75130be9aa72386f5d1880fcb1b2c247117c7798a2a85074ab7eba
3
  size 189556666
checkpoint-169/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9f68c26a4ae1b782074e23956e72eabe9fc24b5e5668651fa004052923dfaa
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d85d4b435d4ac4552bb46f32d5f63a55dbc65baaa5af6a14b6b39e968f4b8e
3
  size 14308
checkpoint-169/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e420c01bd37908254d6db59a8dc2bbd2ef1511c17e4668e659e1910a4f12ae20
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df821f017f2db25687bec6d111c0161751da33a1a44b650c4ff88a56a7c16d8f
3
  size 1064
checkpoint-169/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8145695364238411,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
4
- "epoch": 8.95364238410596,
5
  "eval_steps": 500,
6
  "global_step": 169,
7
  "is_hyper_param_search": false,
@@ -9,116 +9,92 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.9536423841059603,
13
- "eval_accuracy": 0.5298013245033113,
14
- "eval_f1": 0.43216404525386315,
15
- "eval_loss": 1.1326755285263062,
16
- "eval_precision": 0.5213817284211205,
17
- "eval_recall": 0.5298013245033113,
18
- "eval_runtime": 1.2241,
19
- "eval_samples_per_second": 246.709,
20
- "eval_steps_per_second": 31.043,
21
- "step": 18
22
  },
23
  {
24
- "epoch": 1.9602649006622517,
25
- "eval_accuracy": 0.6423841059602649,
26
- "eval_f1": 0.5806184720425087,
27
- "eval_loss": 0.9228919744491577,
28
- "eval_precision": 0.5520930002801896,
29
- "eval_recall": 0.6423841059602649,
30
- "eval_runtime": 1.2236,
31
- "eval_samples_per_second": 246.807,
32
- "eval_steps_per_second": 31.055,
33
- "step": 37
34
  },
35
  {
36
- "epoch": 2.966887417218543,
37
- "eval_accuracy": 0.7086092715231788,
38
- "eval_f1": 0.6539391094940458,
39
- "eval_loss": 0.7409619688987732,
40
- "eval_precision": 0.752516290193287,
41
- "eval_recall": 0.7086092715231788,
42
- "eval_runtime": 1.2459,
43
- "eval_samples_per_second": 242.403,
44
- "eval_steps_per_second": 30.501,
45
- "step": 56
46
  },
47
  {
48
- "epoch": 3.9735099337748343,
49
- "eval_accuracy": 0.7450331125827815,
50
- "eval_f1": 0.7012377717797856,
51
- "eval_loss": 0.6461689472198486,
52
- "eval_precision": 0.7242129191632504,
53
- "eval_recall": 0.7450331125827815,
54
- "eval_runtime": 1.229,
55
- "eval_samples_per_second": 245.723,
56
- "eval_steps_per_second": 30.919,
57
- "step": 75
58
  },
59
  {
60
- "epoch": 4.9801324503311255,
61
- "eval_accuracy": 0.7980132450331126,
62
- "eval_f1": 0.7903709596982513,
63
- "eval_loss": 0.5553261041641235,
64
- "eval_precision": 0.7925903096412185,
65
- "eval_recall": 0.7980132450331126,
66
- "eval_runtime": 1.2897,
67
- "eval_samples_per_second": 234.157,
68
- "eval_steps_per_second": 29.463,
69
- "step": 94
70
  },
71
  {
72
- "epoch": 5.986754966887418,
73
- "eval_accuracy": 0.7781456953642384,
74
- "eval_f1": 0.7717607879297459,
75
- "eval_loss": 0.5255588293075562,
76
- "eval_precision": 0.7771454278224522,
77
- "eval_recall": 0.7781456953642384,
78
- "eval_runtime": 1.2928,
79
- "eval_samples_per_second": 233.597,
80
- "eval_steps_per_second": 29.393,
81
- "step": 113
82
  },
83
  {
84
- "epoch": 6.993377483443709,
85
- "eval_accuracy": 0.7980132450331126,
86
- "eval_f1": 0.7833793670187674,
87
- "eval_loss": 0.5077652335166931,
88
- "eval_precision": 0.7917508237685551,
89
- "eval_recall": 0.7980132450331126,
90
- "eval_runtime": 1.2898,
91
- "eval_samples_per_second": 234.154,
92
- "eval_steps_per_second": 29.463,
93
- "step": 132
94
- },
95
- {
96
- "epoch": 8.0,
97
- "eval_accuracy": 0.8112582781456954,
98
- "eval_f1": 0.8021247299665692,
99
- "eval_loss": 0.4742371141910553,
100
- "eval_precision": 0.8054865043662888,
101
- "eval_recall": 0.8112582781456954,
102
- "eval_runtime": 1.381,
103
- "eval_samples_per_second": 218.682,
104
- "eval_steps_per_second": 27.516,
105
- "step": 151
106
- },
107
- {
108
- "epoch": 8.95364238410596,
109
- "eval_accuracy": 0.8145695364238411,
110
- "eval_f1": 0.805819805920304,
111
- "eval_loss": 0.4742475152015686,
112
- "eval_precision": 0.8065208989148904,
113
- "eval_recall": 0.8145695364238411,
114
- "eval_runtime": 1.2663,
115
- "eval_samples_per_second": 238.482,
116
- "eval_steps_per_second": 30.008,
117
  "step": 169
118
  }
119
  ],
120
  "logging_steps": 500,
121
- "max_steps": 180,
122
  "num_input_tokens_seen": 0,
123
  "num_train_epochs": 10,
124
  "save_steps": 500,
@@ -143,7 +119,7 @@
143
  "attributes": {}
144
  }
145
  },
146
- "total_flos": 2.472539116032e+16,
147
  "train_batch_size": 8,
148
  "trial_name": null,
149
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8475452196382429,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
4
+ "epoch": 6.969072164948454,
5
  "eval_steps": 500,
6
  "global_step": 169,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
+ "step": 24
22
  },
23
  {
24
+ "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
+ "step": 48
34
  },
35
  {
36
+ "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
+ "step": 72
46
  },
47
  {
48
+ "epoch": 4.0,
49
+ "eval_accuracy": 0.6201550387596899,
50
+ "eval_f1": 0.616059564047211,
51
+ "eval_loss": 0.9937827587127686,
52
+ "eval_precision": 0.6843785822798868,
53
+ "eval_recall": 0.6201550387596899,
54
+ "eval_runtime": 1.5998,
55
+ "eval_samples_per_second": 241.901,
56
+ "eval_steps_per_second": 30.628,
57
+ "step": 97
58
  },
59
  {
60
+ "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7881136950904393,
62
+ "eval_f1": 0.7854190251131377,
63
+ "eval_loss": 0.4972754120826721,
64
+ "eval_precision": 0.7961139879912458,
65
+ "eval_recall": 0.7881136950904393,
66
+ "eval_runtime": 1.5913,
67
+ "eval_samples_per_second": 243.196,
68
+ "eval_steps_per_second": 30.792,
69
+ "step": 121
70
  },
71
  {
72
+ "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.7803617571059431,
74
+ "eval_f1": 0.781727235222138,
75
+ "eval_loss": 0.5254013538360596,
76
+ "eval_precision": 0.7843017570642586,
77
+ "eval_recall": 0.7803617571059431,
78
+ "eval_runtime": 1.6096,
79
+ "eval_samples_per_second": 240.428,
80
+ "eval_steps_per_second": 30.442,
81
+ "step": 145
82
  },
83
  {
84
+ "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.8475452196382429,
86
+ "eval_f1": 0.8504309219603966,
87
+ "eval_loss": 0.42636802792549133,
88
+ "eval_precision": 0.8557626671638175,
89
+ "eval_recall": 0.8475452196382429,
90
+ "eval_runtime": 1.6741,
91
+ "eval_samples_per_second": 231.171,
92
+ "eval_steps_per_second": 29.27,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  "step": 169
94
  }
95
  ],
96
  "logging_steps": 500,
97
+ "max_steps": 240,
98
  "num_input_tokens_seen": 0,
99
  "num_train_epochs": 10,
100
  "save_steps": 500,
 
119
  "attributes": {}
120
  }
121
  },
122
+ "total_flos": 2.45957602464e+16,
123
  "train_batch_size": 8,
124
  "trial_name": null,
125
  "trial_params": null
checkpoint-169/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8f2331f3c3c1c25969cfb888574c70dd0e5a19519d8cecb6198afe5225b5a53
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-194/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d03ee65315496952ebb447ef8a2d41c18b1e5e1649887b43e9d670daab42cd79
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a627702cd14b16dbfa2578e7673bd3814fac0eb9375d373c2f06a0a1d5a738
3
  size 94765560
checkpoint-194/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21b9a71b29339ce815e0272ac73c5e9382e8e4b94e000d00c2c15c46fcaf1bb1
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96506f68c35bc380113ed68f683cfdb89562b8e18cd99f04207e7f2cf6c07543
3
  size 189556666
checkpoint-194/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cf3f9f9e1b079187c143133062b7476177d05ae3b26e39797b7b8deffda481f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9baadb1a4e65e06e7e3878b0e06173eb575209134ab9848c9fd367b8c2a762e0
3
  size 1064
checkpoint-194/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8397932816537468,
3
- "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
6
  "global_step": 194,
@@ -10,105 +10,105 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8397932816537468,
86
- "eval_f1": 0.8354840008265724,
87
- "eval_loss": 0.44080850481987,
88
- "eval_precision": 0.8365717854569443,
89
- "eval_recall": 0.8397932816537468,
90
- "eval_runtime": 1.6785,
91
- "eval_samples_per_second": 230.56,
92
- "eval_steps_per_second": 29.192,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8191214470284238,
98
- "eval_f1": 0.8167837576120855,
99
- "eval_loss": 0.46871018409729004,
100
- "eval_precision": 0.8187643627479353,
101
- "eval_recall": 0.8191214470284238,
102
- "eval_runtime": 1.6719,
103
- "eval_samples_per_second": 231.477,
104
- "eval_steps_per_second": 29.308,
105
  "step": 194
106
  }
107
  ],
108
  "logging_steps": 500,
109
- "max_steps": 360,
110
  "num_input_tokens_seen": 0,
111
- "num_train_epochs": 15,
112
  "save_steps": 500,
113
  "stateful_callbacks": {
114
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.8656330749354005,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
6
  "global_step": 194,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6201550387596899,
50
+ "eval_f1": 0.616059564047211,
51
+ "eval_loss": 0.9937827587127686,
52
+ "eval_precision": 0.6843785822798868,
53
+ "eval_recall": 0.6201550387596899,
54
+ "eval_runtime": 1.5998,
55
+ "eval_samples_per_second": 241.901,
56
+ "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7881136950904393,
62
+ "eval_f1": 0.7854190251131377,
63
+ "eval_loss": 0.4972754120826721,
64
+ "eval_precision": 0.7961139879912458,
65
+ "eval_recall": 0.7881136950904393,
66
+ "eval_runtime": 1.5913,
67
+ "eval_samples_per_second": 243.196,
68
+ "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.7803617571059431,
74
+ "eval_f1": 0.781727235222138,
75
+ "eval_loss": 0.5254013538360596,
76
+ "eval_precision": 0.7843017570642586,
77
+ "eval_recall": 0.7803617571059431,
78
+ "eval_runtime": 1.6096,
79
+ "eval_samples_per_second": 240.428,
80
+ "eval_steps_per_second": 30.442,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.8475452196382429,
86
+ "eval_f1": 0.8504309219603966,
87
+ "eval_loss": 0.42636802792549133,
88
+ "eval_precision": 0.8557626671638175,
89
+ "eval_recall": 0.8475452196382429,
90
+ "eval_runtime": 1.6741,
91
+ "eval_samples_per_second": 231.171,
92
+ "eval_steps_per_second": 29.27,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.8656330749354005,
98
+ "eval_f1": 0.8627731371728347,
99
+ "eval_loss": 0.4414582848548889,
100
+ "eval_precision": 0.8670254176803228,
101
+ "eval_recall": 0.8656330749354005,
102
+ "eval_runtime": 1.6573,
103
+ "eval_samples_per_second": 233.512,
104
+ "eval_steps_per_second": 29.566,
105
  "step": 194
106
  }
107
  ],
108
  "logging_steps": 500,
109
+ "max_steps": 240,
110
  "num_input_tokens_seen": 0,
111
+ "num_train_epochs": 10,
112
  "save_steps": 500,
113
  "stateful_callbacks": {
114
  "EarlyStoppingCallback": {
checkpoint-194/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-218/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f22baf06235c468452afb48784bcfe17b8d6b16547f92c897a6b83b9d4be1927
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28660ecbedd8800f912a5673efca37a66998ebce7acd99c4bc922036560b2975
3
  size 94765560
checkpoint-218/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b107c786ae699e053540620707226aced653806a534f7bf2c3d041d5a799d0d
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22cdbaee61cabc6fda6bb731be8bc654cb40396c4c4e2be92d86d4a184ef0728
3
  size 189556666
checkpoint-218/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e51016c1626b13f8e0c2274df9c63eb74ef480d103cd1eaea425e0978c8cf392
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05468f3e42af9ca556e835b8f8bb03232840d74963a2a7b94013d776ea21eb80
3
  size 1064
checkpoint-218/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8475452196382429,
3
- "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-218",
4
  "epoch": 8.989690721649485,
5
  "eval_steps": 500,
6
  "global_step": 218,
@@ -10,117 +10,117 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8397932816537468,
86
- "eval_f1": 0.8354840008265724,
87
- "eval_loss": 0.44080850481987,
88
- "eval_precision": 0.8365717854569443,
89
- "eval_recall": 0.8397932816537468,
90
- "eval_runtime": 1.6785,
91
- "eval_samples_per_second": 230.56,
92
- "eval_steps_per_second": 29.192,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8191214470284238,
98
- "eval_f1": 0.8167837576120855,
99
- "eval_loss": 0.46871018409729004,
100
- "eval_precision": 0.8187643627479353,
101
- "eval_recall": 0.8191214470284238,
102
- "eval_runtime": 1.6719,
103
- "eval_samples_per_second": 231.477,
104
- "eval_steps_per_second": 29.308,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
- "eval_accuracy": 0.8475452196382429,
110
- "eval_f1": 0.8473710740005564,
111
- "eval_loss": 0.4363822937011719,
112
- "eval_precision": 0.8483238707679635,
113
- "eval_recall": 0.8475452196382429,
114
- "eval_runtime": 1.6838,
115
- "eval_samples_per_second": 229.835,
116
- "eval_steps_per_second": 29.101,
117
  "step": 218
118
  }
119
  ],
120
  "logging_steps": 500,
121
- "max_steps": 360,
122
  "num_input_tokens_seen": 0,
123
- "num_train_epochs": 15,
124
  "save_steps": 500,
125
  "stateful_callbacks": {
126
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.8656330749354005,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
  "epoch": 8.989690721649485,
5
  "eval_steps": 500,
6
  "global_step": 218,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6201550387596899,
50
+ "eval_f1": 0.616059564047211,
51
+ "eval_loss": 0.9937827587127686,
52
+ "eval_precision": 0.6843785822798868,
53
+ "eval_recall": 0.6201550387596899,
54
+ "eval_runtime": 1.5998,
55
+ "eval_samples_per_second": 241.901,
56
+ "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7881136950904393,
62
+ "eval_f1": 0.7854190251131377,
63
+ "eval_loss": 0.4972754120826721,
64
+ "eval_precision": 0.7961139879912458,
65
+ "eval_recall": 0.7881136950904393,
66
+ "eval_runtime": 1.5913,
67
+ "eval_samples_per_second": 243.196,
68
+ "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.7803617571059431,
74
+ "eval_f1": 0.781727235222138,
75
+ "eval_loss": 0.5254013538360596,
76
+ "eval_precision": 0.7843017570642586,
77
+ "eval_recall": 0.7803617571059431,
78
+ "eval_runtime": 1.6096,
79
+ "eval_samples_per_second": 240.428,
80
+ "eval_steps_per_second": 30.442,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.8475452196382429,
86
+ "eval_f1": 0.8504309219603966,
87
+ "eval_loss": 0.42636802792549133,
88
+ "eval_precision": 0.8557626671638175,
89
+ "eval_recall": 0.8475452196382429,
90
+ "eval_runtime": 1.6741,
91
+ "eval_samples_per_second": 231.171,
92
+ "eval_steps_per_second": 29.27,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.8656330749354005,
98
+ "eval_f1": 0.8627731371728347,
99
+ "eval_loss": 0.4414582848548889,
100
+ "eval_precision": 0.8670254176803228,
101
+ "eval_recall": 0.8656330749354005,
102
+ "eval_runtime": 1.6573,
103
+ "eval_samples_per_second": 233.512,
104
+ "eval_steps_per_second": 29.566,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8656330749354005,
110
+ "eval_f1": 0.8648808138390941,
111
+ "eval_loss": 0.4115408658981323,
112
+ "eval_precision": 0.8650901526054525,
113
+ "eval_recall": 0.8656330749354005,
114
+ "eval_runtime": 1.6461,
115
+ "eval_samples_per_second": 235.106,
116
+ "eval_steps_per_second": 29.768,
117
  "step": 218
118
  }
119
  ],
120
  "logging_steps": 500,
121
+ "max_steps": 240,
122
  "num_input_tokens_seen": 0,
123
+ "num_train_epochs": 10,
124
  "save_steps": 500,
125
  "stateful_callbacks": {
126
  "EarlyStoppingCallback": {
checkpoint-218/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-24/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3bee6833ba8b12da8d51850147bb3c3b153eec346dfd3b0fda4ff730aa9026
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95db9670c0e00359e684b13ba791d256154dc75e057deabe4d20fd9d0554235
3
  size 94765560
checkpoint-24/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:856d9a47a6db002c6e55d97a99db231cee10d1fef5c0909f7d527b86c30af746
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a82ca177e922f7eb2017da7cfd0f85cb2f6f94396cd239a01bca3e3798f310b8
3
  size 189556666
checkpoint-24/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a45410ab60db58a246b1af794984e39ea7655e4cee627c4cdfe5bd40727f72a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:199b7ea3cf1a15c2128c334c968f9f4a32fee4fdb9c39d77658a7126eada7cf8
3
  size 1064
checkpoint-24/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6356589147286822,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
4
  "epoch": 0.9896907216494846,
5
  "eval_steps": 500,
@@ -10,21 +10,21 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  }
23
  ],
24
  "logging_steps": 500,
25
- "max_steps": 360,
26
  "num_input_tokens_seen": 0,
27
- "num_train_epochs": 15,
28
  "save_steps": 500,
29
  "stateful_callbacks": {
30
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.48320413436692505,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
4
  "epoch": 0.9896907216494846,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  }
23
  ],
24
  "logging_steps": 500,
25
+ "max_steps": 240,
26
  "num_input_tokens_seen": 0,
27
+ "num_train_epochs": 10,
28
  "save_steps": 500,
29
  "stateful_callbacks": {
30
  "EarlyStoppingCallback": {
checkpoint-24/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-240/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "1s_asphyxia": 3,
62
+ "1s_hunger": 2,
63
+ "1s_normal": 0,
64
+ "1s_pain": 1
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-240/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92d699cc9f111730f27e926e204d785c6edcdaa6db0e4e31a03b31178120be85
3
+ size 94765560
checkpoint-240/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02265945a9b9df7108c893934c9f1e83b666674d8cf2044e05bfb54774f0ec5e
3
+ size 189556666
checkpoint-240/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa08735989c33a0829468f970a7eea2caca2871ec3d3c1f5b6fd56289f75077f
3
+ size 14308
checkpoint-240/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa3fb3e2f8633048c1c6df5f62d8f415873cae8a8bc433ff826bcb996ed1b35b
3
+ size 1064
checkpoint-240/trainer_state.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8656330749354005,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
+ "epoch": 9.896907216494846,
5
+ "eval_steps": 500,
6
+ "global_step": 240,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
+ "step": 24
22
+ },
23
+ {
24
+ "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
+ "step": 48
34
+ },
35
+ {
36
+ "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
+ "step": 72
46
+ },
47
+ {
48
+ "epoch": 4.0,
49
+ "eval_accuracy": 0.6201550387596899,
50
+ "eval_f1": 0.616059564047211,
51
+ "eval_loss": 0.9937827587127686,
52
+ "eval_precision": 0.6843785822798868,
53
+ "eval_recall": 0.6201550387596899,
54
+ "eval_runtime": 1.5998,
55
+ "eval_samples_per_second": 241.901,
56
+ "eval_steps_per_second": 30.628,
57
+ "step": 97
58
+ },
59
+ {
60
+ "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7881136950904393,
62
+ "eval_f1": 0.7854190251131377,
63
+ "eval_loss": 0.4972754120826721,
64
+ "eval_precision": 0.7961139879912458,
65
+ "eval_recall": 0.7881136950904393,
66
+ "eval_runtime": 1.5913,
67
+ "eval_samples_per_second": 243.196,
68
+ "eval_steps_per_second": 30.792,
69
+ "step": 121
70
+ },
71
+ {
72
+ "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.7803617571059431,
74
+ "eval_f1": 0.781727235222138,
75
+ "eval_loss": 0.5254013538360596,
76
+ "eval_precision": 0.7843017570642586,
77
+ "eval_recall": 0.7803617571059431,
78
+ "eval_runtime": 1.6096,
79
+ "eval_samples_per_second": 240.428,
80
+ "eval_steps_per_second": 30.442,
81
+ "step": 145
82
+ },
83
+ {
84
+ "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.8475452196382429,
86
+ "eval_f1": 0.8504309219603966,
87
+ "eval_loss": 0.42636802792549133,
88
+ "eval_precision": 0.8557626671638175,
89
+ "eval_recall": 0.8475452196382429,
90
+ "eval_runtime": 1.6741,
91
+ "eval_samples_per_second": 231.171,
92
+ "eval_steps_per_second": 29.27,
93
+ "step": 169
94
+ },
95
+ {
96
+ "epoch": 8.0,
97
+ "eval_accuracy": 0.8656330749354005,
98
+ "eval_f1": 0.8627731371728347,
99
+ "eval_loss": 0.4414582848548889,
100
+ "eval_precision": 0.8670254176803228,
101
+ "eval_recall": 0.8656330749354005,
102
+ "eval_runtime": 1.6573,
103
+ "eval_samples_per_second": 233.512,
104
+ "eval_steps_per_second": 29.566,
105
+ "step": 194
106
+ },
107
+ {
108
+ "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8656330749354005,
110
+ "eval_f1": 0.8648808138390941,
111
+ "eval_loss": 0.4115408658981323,
112
+ "eval_precision": 0.8650901526054525,
113
+ "eval_recall": 0.8656330749354005,
114
+ "eval_runtime": 1.6461,
115
+ "eval_samples_per_second": 235.106,
116
+ "eval_steps_per_second": 29.768,
117
+ "step": 218
118
+ },
119
+ {
120
+ "epoch": 9.896907216494846,
121
+ "eval_accuracy": 0.8656330749354005,
122
+ "eval_f1": 0.8628955278832856,
123
+ "eval_loss": 0.43414339423179626,
124
+ "eval_precision": 0.8627959308862513,
125
+ "eval_recall": 0.8656330749354005,
126
+ "eval_runtime": 1.6443,
127
+ "eval_samples_per_second": 235.354,
128
+ "eval_steps_per_second": 29.799,
129
+ "step": 240
130
+ }
131
+ ],
132
+ "logging_steps": 500,
133
+ "max_steps": 240,
134
+ "num_input_tokens_seen": 0,
135
+ "num_train_epochs": 10,
136
+ "save_steps": 500,
137
+ "stateful_callbacks": {
138
+ "EarlyStoppingCallback": {
139
+ "args": {
140
+ "early_stopping_patience": 3,
141
+ "early_stopping_threshold": 0.0
142
+ },
143
+ "attributes": {
144
+ "early_stopping_patience_counter": 0
145
+ }
146
+ },
147
+ "TrainerControl": {
148
+ "args": {
149
+ "should_epoch_stop": false,
150
+ "should_evaluate": false,
151
+ "should_log": false,
152
+ "should_save": true,
153
+ "should_training_stop": true
154
+ },
155
+ "attributes": {}
156
+ }
157
+ },
158
+ "total_flos": 3.478884368832e+16,
159
+ "train_batch_size": 8,
160
+ "trial_name": null,
161
+ "trial_params": null
162
+ }
checkpoint-240/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
+ size 5240
checkpoint-48/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:874cfd79e7f8f79117c607a043371e2575cea36eabf3fab57dfe6214765f959e
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9973cea0a92ccc3ba76ebb1572eeb391cb070cd494b1e82ef996c7e69204f3
3
  size 94765560
checkpoint-48/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d792dfa624618623c122f4b38eba2876c358b26c734e775720bedeee75004233
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f1b47628687588437ef703f63052450c7ccfdc0cef0369b13f2275efdac0b9
3
  size 189556666
checkpoint-48/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee8b157b5f1b2ae4a4ed28de279e432d640c2b1a56a597116f40e8a0eb7df070
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6157ea2d7163d9ac50ea4f077ca9c7fa0d3ce8899911031b31211d905abbd26
3
  size 1064
checkpoint-48/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7157622739018088,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-48",
4
  "epoch": 1.9793814432989691,
5
  "eval_steps": 500,
@@ -10,33 +10,33 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  }
35
  ],
36
  "logging_steps": 500,
37
- "max_steps": 360,
38
  "num_input_tokens_seen": 0,
39
- "num_train_epochs": 15,
40
  "save_steps": 500,
41
  "stateful_callbacks": {
42
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.7235142118863049,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-48",
4
  "epoch": 1.9793814432989691,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
  "step": 48
34
  }
35
  ],
36
  "logging_steps": 500,
37
+ "max_steps": 240,
38
  "num_input_tokens_seen": 0,
39
+ "num_train_epochs": 10,
40
  "save_steps": 500,
41
  "stateful_callbacks": {
42
  "EarlyStoppingCallback": {
checkpoint-48/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-72/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7319a5906007abccfee6b3903d9644c4f1555218c42143551e3fdc5ddafdaeff
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fafe5e8d3a221adf5fdc2f30cb4c69093527425a911ded5ed478ac7f0fbbab8
3
  size 94765560
checkpoint-72/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07d214f9dd1fc10263bc5278f794a09463e71a214c7c58fea77ceb2220d3d450
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5253a07ab7d3da8498ae929967529fe03141787454a800994517fb089f02a794
3
  size 189556666
checkpoint-72/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee44eb10c5d626424dbeae30798ab223335ff2f90371a9f029e92a7d6ba8ece6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfa659e6db81ea93ae5bc5128ce82974afbbc53d7f1ac1e100d0fe2f646dac9c
3
  size 1064
checkpoint-72/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7493540051679587,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-72",
4
  "epoch": 2.9690721649484537,
5
  "eval_steps": 500,
@@ -10,45 +10,45 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  }
47
  ],
48
  "logging_steps": 500,
49
- "max_steps": 360,
50
  "num_input_tokens_seen": 0,
51
- "num_train_epochs": 15,
52
  "save_steps": 500,
53
  "stateful_callbacks": {
54
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.7312661498708011,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-72",
4
  "epoch": 2.9690721649484537,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.48320413436692505,
14
+ "eval_f1": 0.31484032448297905,
15
+ "eval_loss": 1.1717983484268188,
16
+ "eval_precision": 0.23348623546928937,
17
+ "eval_recall": 0.48320413436692505,
18
+ "eval_runtime": 1.6682,
19
+ "eval_samples_per_second": 231.984,
20
+ "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.7235142118863049,
26
+ "eval_f1": 0.7025965349533153,
27
+ "eval_loss": 0.7550917863845825,
28
+ "eval_precision": 0.7163905403042491,
29
+ "eval_recall": 0.7235142118863049,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 241.836,
32
+ "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.7312661498708011,
38
+ "eval_f1": 0.7074435618655927,
39
+ "eval_loss": 0.6834394335746765,
40
+ "eval_precision": 0.7620582207432803,
41
+ "eval_recall": 0.7312661498708011,
42
+ "eval_runtime": 1.604,
43
+ "eval_samples_per_second": 241.278,
44
+ "eval_steps_per_second": 30.549,
45
  "step": 72
46
  }
47
  ],
48
  "logging_steps": 500,
49
+ "max_steps": 240,
50
  "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 10,
52
  "save_steps": 500,
53
  "stateful_callbacks": {
54
  "EarlyStoppingCallback": {
checkpoint-72/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
checkpoint-97/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8acff5615edba2b47fa24da4e5111226931a4f7f775b8443933bf1c1b200335
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c696fbcea3f1eabb848deb6435364111d0671928df2bef22696b10348e54c32
3
  size 94765560