Marcos12886 commited on
Commit
2f8b8ec
1 Parent(s): a0c9418

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +23 -19
  2. checkpoint-121/model.safetensors +1 -1
  3. checkpoint-121/optimizer.pt +1 -1
  4. checkpoint-121/scheduler.pt +1 -1
  5. checkpoint-121/trainer_state.json +43 -43
  6. checkpoint-121/training_args.bin +1 -1
  7. checkpoint-145/model.safetensors +1 -1
  8. checkpoint-145/optimizer.pt +1 -1
  9. checkpoint-145/scheduler.pt +1 -1
  10. checkpoint-145/trainer_state.json +52 -52
  11. checkpoint-145/training_args.bin +1 -1
  12. checkpoint-169/model.safetensors +1 -1
  13. checkpoint-169/optimizer.pt +1 -1
  14. checkpoint-169/scheduler.pt +1 -1
  15. checkpoint-169/trainer_state.json +59 -59
  16. checkpoint-169/training_args.bin +1 -1
  17. checkpoint-194/model.safetensors +1 -1
  18. checkpoint-194/optimizer.pt +1 -1
  19. checkpoint-194/scheduler.pt +1 -1
  20. checkpoint-194/trainer_state.json +67 -67
  21. checkpoint-194/training_args.bin +1 -1
  22. checkpoint-218/model.safetensors +1 -1
  23. checkpoint-218/optimizer.pt +1 -1
  24. checkpoint-218/scheduler.pt +1 -1
  25. checkpoint-218/trainer_state.json +75 -75
  26. checkpoint-218/training_args.bin +1 -1
  27. checkpoint-24/model.safetensors +1 -1
  28. checkpoint-24/optimizer.pt +1 -1
  29. checkpoint-24/scheduler.pt +1 -1
  30. checkpoint-24/trainer_state.json +11 -11
  31. checkpoint-24/training_args.bin +1 -1
  32. checkpoint-242/model.safetensors +1 -1
  33. checkpoint-242/optimizer.pt +1 -1
  34. checkpoint-242/scheduler.pt +1 -1
  35. checkpoint-242/trainer_state.json +81 -81
  36. checkpoint-242/training_args.bin +1 -1
  37. checkpoint-266/model.safetensors +1 -1
  38. checkpoint-266/optimizer.pt +1 -1
  39. checkpoint-266/scheduler.pt +1 -1
  40. checkpoint-266/trainer_state.json +90 -90
  41. checkpoint-266/training_args.bin +1 -1
  42. checkpoint-291/model.safetensors +1 -1
  43. checkpoint-291/optimizer.pt +1 -1
  44. checkpoint-291/scheduler.pt +1 -1
  45. checkpoint-291/trainer_state.json +96 -96
  46. checkpoint-291/training_args.bin +1 -1
  47. checkpoint-315/model.safetensors +1 -1
  48. checkpoint-315/optimizer.pt +1 -1
  49. checkpoint-315/scheduler.pt +1 -1
  50. checkpoint-315/trainer_state.json +105 -105
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.4341
25
- - Accuracy: 0.8656
26
- - Precision: 0.8628
27
- - Recall: 0.8656
28
- - F1: 0.8629
29
 
30
  ## Model description
31
 
@@ -44,7 +44,7 @@ More information needed
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
- - learning_rate: 0.0003
48
  - train_batch_size: 8
49
  - eval_batch_size: 8
50
  - seed: 123
@@ -53,22 +53,26 @@ The following hyperparameters were used during training:
53
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
  - lr_scheduler_type: cosine
55
  - lr_scheduler_warmup_ratio: 0.4
56
- - num_epochs: 10
57
 
58
  ### Training results
59
 
60
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
61
- |:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
62
- | No log | 0.9897 | 24 | 1.1718 | 0.4832 | 0.2335 | 0.4832 | 0.3148 |
63
- | No log | 1.9794 | 48 | 0.7551 | 0.7235 | 0.7164 | 0.7235 | 0.7026 |
64
- | No log | 2.9691 | 72 | 0.6834 | 0.7313 | 0.7621 | 0.7313 | 0.7074 |
65
- | No log | 4.0 | 97 | 0.9938 | 0.6202 | 0.6844 | 0.6202 | 0.6161 |
66
- | No log | 4.9897 | 121 | 0.4973 | 0.7881 | 0.7961 | 0.7881 | 0.7854 |
67
- | No log | 5.9794 | 145 | 0.5254 | 0.7804 | 0.7843 | 0.7804 | 0.7817 |
68
- | No log | 6.9691 | 169 | 0.4264 | 0.8475 | 0.8558 | 0.8475 | 0.8504 |
69
- | No log | 8.0 | 194 | 0.4415 | 0.8656 | 0.8670 | 0.8656 | 0.8628 |
70
- | No log | 8.9897 | 218 | 0.4115 | 0.8656 | 0.8651 | 0.8656 | 0.8649 |
71
- | No log | 9.8969 | 240 | 0.4341 | 0.8656 | 0.8628 | 0.8656 | 0.8629 |
 
 
 
 
72
 
73
 
74
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.4448
25
+ - Accuracy: 0.8553
26
+ - Precision: 0.8527
27
+ - Recall: 0.8553
28
+ - F1: 0.8522
29
 
30
  ## Model description
31
 
 
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
+ - learning_rate: 0.0001
48
  - train_batch_size: 8
49
  - eval_batch_size: 8
50
  - seed: 123
 
53
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
  - lr_scheduler_type: cosine
55
  - lr_scheduler_warmup_ratio: 0.4
56
+ - num_epochs: 15
57
 
58
  ### Training results
59
 
60
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
61
+ |:-------------:|:-------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
62
+ | No log | 0.9897 | 24 | 1.3056 | 0.4341 | 0.1885 | 0.4341 | 0.2628 |
63
+ | No log | 1.9794 | 48 | 1.1732 | 0.4341 | 0.1885 | 0.4341 | 0.2628 |
64
+ | No log | 2.9691 | 72 | 0.9256 | 0.6357 | 0.6651 | 0.6357 | 0.5935 |
65
+ | No log | 4.0 | 97 | 0.7872 | 0.6563 | 0.6724 | 0.6563 | 0.6387 |
66
+ | No log | 4.9897 | 121 | 0.6242 | 0.7597 | 0.7615 | 0.7597 | 0.7448 |
67
+ | No log | 5.9794 | 145 | 0.5990 | 0.7726 | 0.8035 | 0.7726 | 0.7744 |
68
+ | No log | 6.9691 | 169 | 0.5286 | 0.7907 | 0.8075 | 0.7907 | 0.7889 |
69
+ | No log | 8.0 | 194 | 0.4616 | 0.8140 | 0.8345 | 0.8140 | 0.8191 |
70
+ | No log | 8.9897 | 218 | 0.5001 | 0.8114 | 0.8142 | 0.8114 | 0.8021 |
71
+ | No log | 9.9794 | 242 | 0.4530 | 0.8165 | 0.8131 | 0.8165 | 0.8126 |
72
+ | No log | 10.9691 | 266 | 0.4203 | 0.8553 | 0.8586 | 0.8553 | 0.8544 |
73
+ | No log | 12.0 | 291 | 0.4621 | 0.8450 | 0.8423 | 0.8450 | 0.8402 |
74
+ | No log | 12.9897 | 315 | 0.4583 | 0.8501 | 0.8493 | 0.8501 | 0.8471 |
75
+ | No log | 13.9794 | 339 | 0.4448 | 0.8553 | 0.8527 | 0.8553 | 0.8522 |
76
 
77
 
78
  ### Framework versions
checkpoint-121/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70018b912cc31f04407d1e82b6579b497cb0a4c9d1fe58cce991fc2459540c94
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1939f9981d9c8176b8e2ef6ece92977f9623abe931e4298abd38d6642f59f5e0
3
  size 94765560
checkpoint-121/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:997c0052dc18c15b0c00ff6b7f5f979b1f10ee85d5dd478ac31aec3c59104615
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f71385a72419ee05ba30d63e105082967d8ab9f6d1911eb2e7835404967095
3
  size 189556666
checkpoint-121/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f9c5f2a04362b085af0242d72c5af05a769e7bae4cee46ee7d346c9df7f1eba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c1e98f43f6f001d63e1530acd921a7d70d5042210d56e33079392df6fbf6ed
3
  size 1064
checkpoint-121/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7881136950904393,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 4.989690721649485,
5
  "eval_steps": 500,
@@ -10,69 +10,69 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.48320413436692505,
14
- "eval_f1": 0.31484032448297905,
15
- "eval_loss": 1.1717983484268188,
16
- "eval_precision": 0.23348623546928937,
17
- "eval_recall": 0.48320413436692505,
18
- "eval_runtime": 1.6682,
19
- "eval_samples_per_second": 231.984,
20
- "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7235142118863049,
26
- "eval_f1": 0.7025965349533153,
27
- "eval_loss": 0.7550917863845825,
28
- "eval_precision": 0.7163905403042491,
29
- "eval_recall": 0.7235142118863049,
30
- "eval_runtime": 1.6003,
31
- "eval_samples_per_second": 241.836,
32
- "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7312661498708011,
38
- "eval_f1": 0.7074435618655927,
39
- "eval_loss": 0.6834394335746765,
40
- "eval_precision": 0.7620582207432803,
41
- "eval_recall": 0.7312661498708011,
42
- "eval_runtime": 1.604,
43
- "eval_samples_per_second": 241.278,
44
- "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.6201550387596899,
50
- "eval_f1": 0.616059564047211,
51
- "eval_loss": 0.9937827587127686,
52
- "eval_precision": 0.6843785822798868,
53
- "eval_recall": 0.6201550387596899,
54
- "eval_runtime": 1.5998,
55
- "eval_samples_per_second": 241.901,
56
- "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.7881136950904393,
62
- "eval_f1": 0.7854190251131377,
63
- "eval_loss": 0.4972754120826721,
64
- "eval_precision": 0.7961139879912458,
65
- "eval_recall": 0.7881136950904393,
66
- "eval_runtime": 1.5913,
67
- "eval_samples_per_second": 243.196,
68
- "eval_steps_per_second": 30.792,
69
  "step": 121
70
  }
71
  ],
72
  "logging_steps": 500,
73
- "max_steps": 240,
74
  "num_input_tokens_seen": 0,
75
- "num_train_epochs": 10,
76
  "save_steps": 500,
77
  "stateful_callbacks": {
78
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.7596899224806202,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 4.989690721649485,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  }
71
  ],
72
  "logging_steps": 500,
73
+ "max_steps": 360,
74
  "num_input_tokens_seen": 0,
75
+ "num_train_epochs": 15,
76
  "save_steps": 500,
77
  "stateful_callbacks": {
78
  "EarlyStoppingCallback": {
checkpoint-121/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-145/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9b4c786da0e0851be69580368fd5d534a86467be3071d9ea96324ca5e0176a7
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d599a25f75eb114df73c919291fb52b68c98855b78355db615bcc43f381711
3
  size 94765560
checkpoint-145/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a673f8f7ab1809cc9b549b3982b0a64589cbdccccf65a873c995d3b5ba6af28d
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55eb26e80237b5f7b441b15ae3ac2ba04a224510202eedb6b4f807fcc5598dbc
3
  size 189556666
checkpoint-145/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ce77a15ff28da0b94b70192478b0d3dfb304ac1b9a3e90e47f0a9e682aca696
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ef73ef30733d66c33837a6a1caa2497a300ae95d575b39cf2cd805a422d9ad
3
  size 1064
checkpoint-145/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7881136950904393,
3
- "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
  "epoch": 5.979381443298969,
5
  "eval_steps": 500,
6
  "global_step": 145,
@@ -10,81 +10,81 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.48320413436692505,
14
- "eval_f1": 0.31484032448297905,
15
- "eval_loss": 1.1717983484268188,
16
- "eval_precision": 0.23348623546928937,
17
- "eval_recall": 0.48320413436692505,
18
- "eval_runtime": 1.6682,
19
- "eval_samples_per_second": 231.984,
20
- "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7235142118863049,
26
- "eval_f1": 0.7025965349533153,
27
- "eval_loss": 0.7550917863845825,
28
- "eval_precision": 0.7163905403042491,
29
- "eval_recall": 0.7235142118863049,
30
- "eval_runtime": 1.6003,
31
- "eval_samples_per_second": 241.836,
32
- "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7312661498708011,
38
- "eval_f1": 0.7074435618655927,
39
- "eval_loss": 0.6834394335746765,
40
- "eval_precision": 0.7620582207432803,
41
- "eval_recall": 0.7312661498708011,
42
- "eval_runtime": 1.604,
43
- "eval_samples_per_second": 241.278,
44
- "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.6201550387596899,
50
- "eval_f1": 0.616059564047211,
51
- "eval_loss": 0.9937827587127686,
52
- "eval_precision": 0.6843785822798868,
53
- "eval_recall": 0.6201550387596899,
54
- "eval_runtime": 1.5998,
55
- "eval_samples_per_second": 241.901,
56
- "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.7881136950904393,
62
- "eval_f1": 0.7854190251131377,
63
- "eval_loss": 0.4972754120826721,
64
- "eval_precision": 0.7961139879912458,
65
- "eval_recall": 0.7881136950904393,
66
- "eval_runtime": 1.5913,
67
- "eval_samples_per_second": 243.196,
68
- "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7803617571059431,
74
- "eval_f1": 0.781727235222138,
75
- "eval_loss": 0.5254013538360596,
76
- "eval_precision": 0.7843017570642586,
77
- "eval_recall": 0.7803617571059431,
78
- "eval_runtime": 1.6096,
79
- "eval_samples_per_second": 240.428,
80
- "eval_steps_per_second": 30.442,
81
  "step": 145
82
  }
83
  ],
84
  "logging_steps": 500,
85
- "max_steps": 240,
86
  "num_input_tokens_seen": 0,
87
- "num_train_epochs": 10,
88
  "save_steps": 500,
89
  "stateful_callbacks": {
90
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.772609819121447,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-145",
4
  "epoch": 5.979381443298969,
5
  "eval_steps": 500,
6
  "global_step": 145,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  }
83
  ],
84
  "logging_steps": 500,
85
+ "max_steps": 360,
86
  "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 15,
88
  "save_steps": 500,
89
  "stateful_callbacks": {
90
  "EarlyStoppingCallback": {
checkpoint-145/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-169/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ddbb394005b9ac70d5bc17bcb01362284d0d2278a1e44b6ce7c3513e050d418
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7fe0ba576e277ef89da2b3ca1c5aa2e93a8917ae528f01a03f6dfdf1268ac6a
3
  size 94765560
checkpoint-169/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e66e987ee75130be9aa72386f5d1880fcb1b2c247117c7798a2a85074ab7eba
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0311ff790bd7ab214d52d34a06580321ac9204750d3399ca3313dd76c3892bfc
3
  size 189556666
checkpoint-169/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df821f017f2db25687bec6d111c0161751da33a1a44b650c4ff88a56a7c16d8f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f4cbb3520d6827f9ee7c7377912e6794964491937f949c08d7bee3e3b6378b
3
  size 1064
checkpoint-169/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8475452196382429,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
4
  "epoch": 6.969072164948454,
5
  "eval_steps": 500,
@@ -10,93 +10,93 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.48320413436692505,
14
- "eval_f1": 0.31484032448297905,
15
- "eval_loss": 1.1717983484268188,
16
- "eval_precision": 0.23348623546928937,
17
- "eval_recall": 0.48320413436692505,
18
- "eval_runtime": 1.6682,
19
- "eval_samples_per_second": 231.984,
20
- "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7235142118863049,
26
- "eval_f1": 0.7025965349533153,
27
- "eval_loss": 0.7550917863845825,
28
- "eval_precision": 0.7163905403042491,
29
- "eval_recall": 0.7235142118863049,
30
- "eval_runtime": 1.6003,
31
- "eval_samples_per_second": 241.836,
32
- "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7312661498708011,
38
- "eval_f1": 0.7074435618655927,
39
- "eval_loss": 0.6834394335746765,
40
- "eval_precision": 0.7620582207432803,
41
- "eval_recall": 0.7312661498708011,
42
- "eval_runtime": 1.604,
43
- "eval_samples_per_second": 241.278,
44
- "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.6201550387596899,
50
- "eval_f1": 0.616059564047211,
51
- "eval_loss": 0.9937827587127686,
52
- "eval_precision": 0.6843785822798868,
53
- "eval_recall": 0.6201550387596899,
54
- "eval_runtime": 1.5998,
55
- "eval_samples_per_second": 241.901,
56
- "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.7881136950904393,
62
- "eval_f1": 0.7854190251131377,
63
- "eval_loss": 0.4972754120826721,
64
- "eval_precision": 0.7961139879912458,
65
- "eval_recall": 0.7881136950904393,
66
- "eval_runtime": 1.5913,
67
- "eval_samples_per_second": 243.196,
68
- "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7803617571059431,
74
- "eval_f1": 0.781727235222138,
75
- "eval_loss": 0.5254013538360596,
76
- "eval_precision": 0.7843017570642586,
77
- "eval_recall": 0.7803617571059431,
78
- "eval_runtime": 1.6096,
79
- "eval_samples_per_second": 240.428,
80
- "eval_steps_per_second": 30.442,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8475452196382429,
86
- "eval_f1": 0.8504309219603966,
87
- "eval_loss": 0.42636802792549133,
88
- "eval_precision": 0.8557626671638175,
89
- "eval_recall": 0.8475452196382429,
90
- "eval_runtime": 1.6741,
91
- "eval_samples_per_second": 231.171,
92
- "eval_steps_per_second": 29.27,
93
  "step": 169
94
  }
95
  ],
96
  "logging_steps": 500,
97
- "max_steps": 240,
98
  "num_input_tokens_seen": 0,
99
- "num_train_epochs": 10,
100
  "save_steps": 500,
101
  "stateful_callbacks": {
102
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.7906976744186046,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
4
  "epoch": 6.969072164948454,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  }
95
  ],
96
  "logging_steps": 500,
97
+ "max_steps": 360,
98
  "num_input_tokens_seen": 0,
99
+ "num_train_epochs": 15,
100
  "save_steps": 500,
101
  "stateful_callbacks": {
102
  "EarlyStoppingCallback": {
checkpoint-169/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-194/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3a627702cd14b16dbfa2578e7673bd3814fac0eb9375d373c2f06a0a1d5a738
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baae35578ce27c737b5559c2b66510e84a496d80b00550c0dd5d72fcbb7ced0d
3
  size 94765560
checkpoint-194/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96506f68c35bc380113ed68f683cfdb89562b8e18cd99f04207e7f2cf6c07543
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bde7ea27c2658d4486314948eea84d2e6cb3342425e8a03c5e81c442be2dce5
3
  size 189556666
checkpoint-194/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9baadb1a4e65e06e7e3878b0e06173eb575209134ab9848c9fd367b8c2a762e0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2515f26554ebceb934b5250a537f0e70da384f3e37f2374ff2d98c54b8331c
3
  size 1064
checkpoint-194/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8656330749354005,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
@@ -10,105 +10,105 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.48320413436692505,
14
- "eval_f1": 0.31484032448297905,
15
- "eval_loss": 1.1717983484268188,
16
- "eval_precision": 0.23348623546928937,
17
- "eval_recall": 0.48320413436692505,
18
- "eval_runtime": 1.6682,
19
- "eval_samples_per_second": 231.984,
20
- "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7235142118863049,
26
- "eval_f1": 0.7025965349533153,
27
- "eval_loss": 0.7550917863845825,
28
- "eval_precision": 0.7163905403042491,
29
- "eval_recall": 0.7235142118863049,
30
- "eval_runtime": 1.6003,
31
- "eval_samples_per_second": 241.836,
32
- "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7312661498708011,
38
- "eval_f1": 0.7074435618655927,
39
- "eval_loss": 0.6834394335746765,
40
- "eval_precision": 0.7620582207432803,
41
- "eval_recall": 0.7312661498708011,
42
- "eval_runtime": 1.604,
43
- "eval_samples_per_second": 241.278,
44
- "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.6201550387596899,
50
- "eval_f1": 0.616059564047211,
51
- "eval_loss": 0.9937827587127686,
52
- "eval_precision": 0.6843785822798868,
53
- "eval_recall": 0.6201550387596899,
54
- "eval_runtime": 1.5998,
55
- "eval_samples_per_second": 241.901,
56
- "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.7881136950904393,
62
- "eval_f1": 0.7854190251131377,
63
- "eval_loss": 0.4972754120826721,
64
- "eval_precision": 0.7961139879912458,
65
- "eval_recall": 0.7881136950904393,
66
- "eval_runtime": 1.5913,
67
- "eval_samples_per_second": 243.196,
68
- "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7803617571059431,
74
- "eval_f1": 0.781727235222138,
75
- "eval_loss": 0.5254013538360596,
76
- "eval_precision": 0.7843017570642586,
77
- "eval_recall": 0.7803617571059431,
78
- "eval_runtime": 1.6096,
79
- "eval_samples_per_second": 240.428,
80
- "eval_steps_per_second": 30.442,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8475452196382429,
86
- "eval_f1": 0.8504309219603966,
87
- "eval_loss": 0.42636802792549133,
88
- "eval_precision": 0.8557626671638175,
89
- "eval_recall": 0.8475452196382429,
90
- "eval_runtime": 1.6741,
91
- "eval_samples_per_second": 231.171,
92
- "eval_steps_per_second": 29.27,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8656330749354005,
98
- "eval_f1": 0.8627731371728347,
99
- "eval_loss": 0.4414582848548889,
100
- "eval_precision": 0.8670254176803228,
101
- "eval_recall": 0.8656330749354005,
102
- "eval_runtime": 1.6573,
103
- "eval_samples_per_second": 233.512,
104
- "eval_steps_per_second": 29.566,
105
  "step": 194
106
  }
107
  ],
108
  "logging_steps": 500,
109
- "max_steps": 240,
110
  "num_input_tokens_seen": 0,
111
- "num_train_epochs": 10,
112
  "save_steps": 500,
113
  "stateful_callbacks": {
114
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.813953488372093,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.813953488372093,
98
+ "eval_f1": 0.8191057475344072,
99
+ "eval_loss": 0.461563378572464,
100
+ "eval_precision": 0.8344562802960127,
101
+ "eval_recall": 0.813953488372093,
102
+ "eval_runtime": 1.6538,
103
+ "eval_samples_per_second": 234.004,
104
+ "eval_steps_per_second": 29.628,
105
  "step": 194
106
  }
107
  ],
108
  "logging_steps": 500,
109
+ "max_steps": 360,
110
  "num_input_tokens_seen": 0,
111
+ "num_train_epochs": 15,
112
  "save_steps": 500,
113
  "stateful_callbacks": {
114
  "EarlyStoppingCallback": {
checkpoint-194/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-218/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28660ecbedd8800f912a5673efca37a66998ebce7acd99c4bc922036560b2975
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5129276136473933fb458063a0adbebe29c1ebcb44e427a4cfd6df34f7fe79db
3
  size 94765560
checkpoint-218/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22cdbaee61cabc6fda6bb731be8bc654cb40396c4c4e2be92d86d4a184ef0728
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653e15316ce9cbc92d4497de1066858cce0103cd3586d854ad0633718f85320c
3
  size 189556666
checkpoint-218/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05468f3e42af9ca556e835b8f8bb03232840d74963a2a7b94013d776ea21eb80
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9b775c41ccbd0ddb1034a30399d9f5182b6429a8e768d1d1ee3277da7ac95d
3
  size 1064
checkpoint-218/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8656330749354005,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
  "epoch": 8.989690721649485,
5
  "eval_steps": 500,
@@ -10,117 +10,117 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.48320413436692505,
14
- "eval_f1": 0.31484032448297905,
15
- "eval_loss": 1.1717983484268188,
16
- "eval_precision": 0.23348623546928937,
17
- "eval_recall": 0.48320413436692505,
18
- "eval_runtime": 1.6682,
19
- "eval_samples_per_second": 231.984,
20
- "eval_steps_per_second": 29.373,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7235142118863049,
26
- "eval_f1": 0.7025965349533153,
27
- "eval_loss": 0.7550917863845825,
28
- "eval_precision": 0.7163905403042491,
29
- "eval_recall": 0.7235142118863049,
30
- "eval_runtime": 1.6003,
31
- "eval_samples_per_second": 241.836,
32
- "eval_steps_per_second": 30.62,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7312661498708011,
38
- "eval_f1": 0.7074435618655927,
39
- "eval_loss": 0.6834394335746765,
40
- "eval_precision": 0.7620582207432803,
41
- "eval_recall": 0.7312661498708011,
42
- "eval_runtime": 1.604,
43
- "eval_samples_per_second": 241.278,
44
- "eval_steps_per_second": 30.549,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.6201550387596899,
50
- "eval_f1": 0.616059564047211,
51
- "eval_loss": 0.9937827587127686,
52
- "eval_precision": 0.6843785822798868,
53
- "eval_recall": 0.6201550387596899,
54
- "eval_runtime": 1.5998,
55
- "eval_samples_per_second": 241.901,
56
- "eval_steps_per_second": 30.628,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.7881136950904393,
62
- "eval_f1": 0.7854190251131377,
63
- "eval_loss": 0.4972754120826721,
64
- "eval_precision": 0.7961139879912458,
65
- "eval_recall": 0.7881136950904393,
66
- "eval_runtime": 1.5913,
67
- "eval_samples_per_second": 243.196,
68
- "eval_steps_per_second": 30.792,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7803617571059431,
74
- "eval_f1": 0.781727235222138,
75
- "eval_loss": 0.5254013538360596,
76
- "eval_precision": 0.7843017570642586,
77
- "eval_recall": 0.7803617571059431,
78
- "eval_runtime": 1.6096,
79
- "eval_samples_per_second": 240.428,
80
- "eval_steps_per_second": 30.442,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8475452196382429,
86
- "eval_f1": 0.8504309219603966,
87
- "eval_loss": 0.42636802792549133,
88
- "eval_precision": 0.8557626671638175,
89
- "eval_recall": 0.8475452196382429,
90
- "eval_runtime": 1.6741,
91
- "eval_samples_per_second": 231.171,
92
- "eval_steps_per_second": 29.27,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8656330749354005,
98
- "eval_f1": 0.8627731371728347,
99
- "eval_loss": 0.4414582848548889,
100
- "eval_precision": 0.8670254176803228,
101
- "eval_recall": 0.8656330749354005,
102
- "eval_runtime": 1.6573,
103
- "eval_samples_per_second": 233.512,
104
- "eval_steps_per_second": 29.566,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
- "eval_accuracy": 0.8656330749354005,
110
- "eval_f1": 0.8648808138390941,
111
- "eval_loss": 0.4115408658981323,
112
- "eval_precision": 0.8650901526054525,
113
- "eval_recall": 0.8656330749354005,
114
- "eval_runtime": 1.6461,
115
- "eval_samples_per_second": 235.106,
116
- "eval_steps_per_second": 29.768,
117
  "step": 218
118
  }
119
  ],
120
  "logging_steps": 500,
121
- "max_steps": 240,
122
  "num_input_tokens_seen": 0,
123
- "num_train_epochs": 10,
124
  "save_steps": 500,
125
  "stateful_callbacks": {
126
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.813953488372093,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
4
  "epoch": 8.989690721649485,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.813953488372093,
98
+ "eval_f1": 0.8191057475344072,
99
+ "eval_loss": 0.461563378572464,
100
+ "eval_precision": 0.8344562802960127,
101
+ "eval_recall": 0.813953488372093,
102
+ "eval_runtime": 1.6538,
103
+ "eval_samples_per_second": 234.004,
104
+ "eval_steps_per_second": 29.628,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8113695090439277,
110
+ "eval_f1": 0.8021117074140329,
111
+ "eval_loss": 0.5000612735748291,
112
+ "eval_precision": 0.8141611295681063,
113
+ "eval_recall": 0.8113695090439277,
114
+ "eval_runtime": 1.6675,
115
+ "eval_samples_per_second": 232.083,
116
+ "eval_steps_per_second": 29.385,
117
  "step": 218
118
  }
119
  ],
120
  "logging_steps": 500,
121
+ "max_steps": 360,
122
  "num_input_tokens_seen": 0,
123
+ "num_train_epochs": 15,
124
  "save_steps": 500,
125
  "stateful_callbacks": {
126
  "EarlyStoppingCallback": {
checkpoint-218/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-24/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d95db9670c0e00359e684b13ba791d256154dc75e057deabe4d20fd9d0554235
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ae367d698cb4b2e6ff776f679b85a1196e77d049b832152d6bd4ad705a5419
3
  size 94765560
checkpoint-24/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82ca177e922f7eb2017da7cfd0f85cb2f6f94396cd239a01bca3e3798f310b8
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92d718808a7945f2b041265bbb510cac2fcd60f2bd0f446c630ca9abb3c6ecd
3
  size 189556666
checkpoint-24/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:199b7ea3cf1a15c2128c334c968f9f4a32fee4fdb9c39d77658a7126eada7cf8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5d0d6ddafc4ad00c987b5d1beff7b3561a3dc543c6b67f789c33e6fa494a9d
3
  size 1064
checkpoint-24/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.48320413436692505,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
4
  "epoch": 0.9896907216494846,
5
  "eval_steps": 500,
@@ -10,21 +10,21 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.48320413436692505,
14
- "eval_f1": 0.31484032448297905,
15
- "eval_loss": 1.1717983484268188,
16
- "eval_precision": 0.23348623546928937,
17
- "eval_recall": 0.48320413436692505,
18
- "eval_runtime": 1.6682,
19
- "eval_samples_per_second": 231.984,
20
- "eval_steps_per_second": 29.373,
21
  "step": 24
22
  }
23
  ],
24
  "logging_steps": 500,
25
- "max_steps": 240,
26
  "num_input_tokens_seen": 0,
27
- "num_train_epochs": 10,
28
  "save_steps": 500,
29
  "stateful_callbacks": {
30
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 0.43410852713178294,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
4
  "epoch": 0.9896907216494846,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  }
23
  ],
24
  "logging_steps": 500,
25
+ "max_steps": 360,
26
  "num_input_tokens_seen": 0,
27
+ "num_train_epochs": 15,
28
  "save_steps": 500,
29
  "stateful_callbacks": {
30
  "EarlyStoppingCallback": {
checkpoint-24/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-242/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c3c3acfb39a1584bdad8c4452c6a99cc55d03b67d4754ef55bdc31be94eb5b7
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c039c1b758751ec121fc469bc7547e1982caf925c239f0210a25629f20fcc808
3
  size 94765560
checkpoint-242/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529f05dfd1fdfa07d440221559587dda8eded6dd4e7f89fd777f0ac7f24f5fee
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b3d6cee0f14c6a503cbdbf9fd578d74b21bb5ca1a160a77577df181e0a24ca4
3
  size 189556666
checkpoint-242/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781bb61f6baa40042311c990cc82713d83cdc179ccb43dbb9cdd148961a2e8ad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a84ec1b939f46a9a8aabb9a272f34dbebd1f7c4378749e925f2e59c5f4fa3e4
3
  size 1064
checkpoint-242/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8578811369509044,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242",
4
  "epoch": 9.97938144329897,
5
  "eval_steps": 500,
@@ -10,122 +10,122 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8397932816537468,
86
- "eval_f1": 0.8354840008265724,
87
- "eval_loss": 0.44080850481987,
88
- "eval_precision": 0.8365717854569443,
89
- "eval_recall": 0.8397932816537468,
90
- "eval_runtime": 1.6785,
91
- "eval_samples_per_second": 230.56,
92
- "eval_steps_per_second": 29.192,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8191214470284238,
98
- "eval_f1": 0.8167837576120855,
99
- "eval_loss": 0.46871018409729004,
100
- "eval_precision": 0.8187643627479353,
101
- "eval_recall": 0.8191214470284238,
102
- "eval_runtime": 1.6719,
103
- "eval_samples_per_second": 231.477,
104
- "eval_steps_per_second": 29.308,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
- "eval_accuracy": 0.8475452196382429,
110
- "eval_f1": 0.8473710740005564,
111
- "eval_loss": 0.4363822937011719,
112
- "eval_precision": 0.8483238707679635,
113
- "eval_recall": 0.8475452196382429,
114
- "eval_runtime": 1.6838,
115
- "eval_samples_per_second": 229.835,
116
- "eval_steps_per_second": 29.101,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
- "eval_accuracy": 0.8578811369509044,
122
- "eval_f1": 0.8567532661685897,
123
- "eval_loss": 0.42906680703163147,
124
- "eval_precision": 0.8560504853170988,
125
- "eval_recall": 0.8578811369509044,
126
- "eval_runtime": 1.6875,
127
- "eval_samples_per_second": 229.333,
128
- "eval_steps_per_second": 29.037,
129
  "step": 242
130
  }
131
  ],
 
1
  {
2
+ "best_metric": 0.8165374677002584,
3
  "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242",
4
  "epoch": 9.97938144329897,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.813953488372093,
98
+ "eval_f1": 0.8191057475344072,
99
+ "eval_loss": 0.461563378572464,
100
+ "eval_precision": 0.8344562802960127,
101
+ "eval_recall": 0.813953488372093,
102
+ "eval_runtime": 1.6538,
103
+ "eval_samples_per_second": 234.004,
104
+ "eval_steps_per_second": 29.628,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8113695090439277,
110
+ "eval_f1": 0.8021117074140329,
111
+ "eval_loss": 0.5000612735748291,
112
+ "eval_precision": 0.8141611295681063,
113
+ "eval_recall": 0.8113695090439277,
114
+ "eval_runtime": 1.6675,
115
+ "eval_samples_per_second": 232.083,
116
+ "eval_steps_per_second": 29.385,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
+ "eval_accuracy": 0.8165374677002584,
122
+ "eval_f1": 0.8125708918966329,
123
+ "eval_loss": 0.4529660940170288,
124
+ "eval_precision": 0.8130985716009446,
125
+ "eval_recall": 0.8165374677002584,
126
+ "eval_runtime": 1.6635,
127
+ "eval_samples_per_second": 232.639,
128
+ "eval_steps_per_second": 29.456,
129
  "step": 242
130
  }
131
  ],
checkpoint-242/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-266/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b40af64761e85d5fea70c649217c8cc140a31cb9ca6b3839f952a2bb9f87e0b2
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27c7790fb0d0e3d63b0164e19322c55c710c87be9b9e7cfd68ac7458a7b8b64
3
  size 94765560
checkpoint-266/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2157435343c02544baabe61713043b7b3749a01b1cc4aeeb2340f89faf040460
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98d5ad73bbffe35a1d38bc982d963fd54e116f0e7fd327bae41f2db23ea9d88
3
  size 189556666
checkpoint-266/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:431e6d6d13cfa84e3db2d479446a897f9f93f4718bfe0ac4b68d73256eda8d91
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa01376065b837c8a8be09b795bba516d496776d4563c1bd964ea469929491d5
3
  size 1064
checkpoint-266/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8578811369509044,
3
- "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242",
4
  "epoch": 10.969072164948454,
5
  "eval_steps": 500,
6
  "global_step": 266,
@@ -10,134 +10,134 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8397932816537468,
86
- "eval_f1": 0.8354840008265724,
87
- "eval_loss": 0.44080850481987,
88
- "eval_precision": 0.8365717854569443,
89
- "eval_recall": 0.8397932816537468,
90
- "eval_runtime": 1.6785,
91
- "eval_samples_per_second": 230.56,
92
- "eval_steps_per_second": 29.192,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8191214470284238,
98
- "eval_f1": 0.8167837576120855,
99
- "eval_loss": 0.46871018409729004,
100
- "eval_precision": 0.8187643627479353,
101
- "eval_recall": 0.8191214470284238,
102
- "eval_runtime": 1.6719,
103
- "eval_samples_per_second": 231.477,
104
- "eval_steps_per_second": 29.308,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
- "eval_accuracy": 0.8475452196382429,
110
- "eval_f1": 0.8473710740005564,
111
- "eval_loss": 0.4363822937011719,
112
- "eval_precision": 0.8483238707679635,
113
- "eval_recall": 0.8475452196382429,
114
- "eval_runtime": 1.6838,
115
- "eval_samples_per_second": 229.835,
116
- "eval_steps_per_second": 29.101,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
- "eval_accuracy": 0.8578811369509044,
122
- "eval_f1": 0.8567532661685897,
123
- "eval_loss": 0.42906680703163147,
124
- "eval_precision": 0.8560504853170988,
125
- "eval_recall": 0.8578811369509044,
126
- "eval_runtime": 1.6875,
127
- "eval_samples_per_second": 229.333,
128
- "eval_steps_per_second": 29.037,
129
  "step": 242
130
  },
131
  {
132
  "epoch": 10.969072164948454,
133
- "eval_accuracy": 0.8501291989664083,
134
- "eval_f1": 0.8527674409653727,
135
- "eval_loss": 0.46987947821617126,
136
- "eval_precision": 0.858217159873796,
137
- "eval_recall": 0.8501291989664083,
138
- "eval_runtime": 1.6795,
139
- "eval_samples_per_second": 230.425,
140
- "eval_steps_per_second": 29.175,
141
  "step": 266
142
  }
143
  ],
 
1
  {
2
+ "best_metric": 0.8552971576227391,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-266",
4
  "epoch": 10.969072164948454,
5
  "eval_steps": 500,
6
  "global_step": 266,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.813953488372093,
98
+ "eval_f1": 0.8191057475344072,
99
+ "eval_loss": 0.461563378572464,
100
+ "eval_precision": 0.8344562802960127,
101
+ "eval_recall": 0.813953488372093,
102
+ "eval_runtime": 1.6538,
103
+ "eval_samples_per_second": 234.004,
104
+ "eval_steps_per_second": 29.628,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8113695090439277,
110
+ "eval_f1": 0.8021117074140329,
111
+ "eval_loss": 0.5000612735748291,
112
+ "eval_precision": 0.8141611295681063,
113
+ "eval_recall": 0.8113695090439277,
114
+ "eval_runtime": 1.6675,
115
+ "eval_samples_per_second": 232.083,
116
+ "eval_steps_per_second": 29.385,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
+ "eval_accuracy": 0.8165374677002584,
122
+ "eval_f1": 0.8125708918966329,
123
+ "eval_loss": 0.4529660940170288,
124
+ "eval_precision": 0.8130985716009446,
125
+ "eval_recall": 0.8165374677002584,
126
+ "eval_runtime": 1.6635,
127
+ "eval_samples_per_second": 232.639,
128
+ "eval_steps_per_second": 29.456,
129
  "step": 242
130
  },
131
  {
132
  "epoch": 10.969072164948454,
133
+ "eval_accuracy": 0.8552971576227391,
134
+ "eval_f1": 0.8544131646946824,
135
+ "eval_loss": 0.4202619791030884,
136
+ "eval_precision": 0.8585534724896462,
137
+ "eval_recall": 0.8552971576227391,
138
+ "eval_runtime": 1.6427,
139
+ "eval_samples_per_second": 235.585,
140
+ "eval_steps_per_second": 29.829,
141
  "step": 266
142
  }
143
  ],
checkpoint-266/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-291/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f47d00b50afe8f783e4bf6f910a770b985446e9c936205d750ce95a2c463cbf3
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0bb3d75286bab3d1cdcb26cb55ac58213a576ca559220ce7c256cd49a3933c5
3
  size 94765560
checkpoint-291/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c1f039c3b8800766242328eaa5d76f19b0a28b425876e5045d7114823501fe
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9b7310c27516ef517a6c5e8f61fd09550e97bc3995b7435afd54a616db18dd
3
  size 189556666
checkpoint-291/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4fa6a80ced34d80d85c2cda8f22c82979b4cdae444e81fcc24e1cd53d6a4cf2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f29254c7742362677883a21732c9472c2f09e4b6371a56cd238ad172f6f4bc9d
3
  size 1064
checkpoint-291/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8578811369509044,
3
- "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242",
4
  "epoch": 12.0,
5
  "eval_steps": 500,
6
  "global_step": 291,
@@ -10,146 +10,146 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8397932816537468,
86
- "eval_f1": 0.8354840008265724,
87
- "eval_loss": 0.44080850481987,
88
- "eval_precision": 0.8365717854569443,
89
- "eval_recall": 0.8397932816537468,
90
- "eval_runtime": 1.6785,
91
- "eval_samples_per_second": 230.56,
92
- "eval_steps_per_second": 29.192,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8191214470284238,
98
- "eval_f1": 0.8167837576120855,
99
- "eval_loss": 0.46871018409729004,
100
- "eval_precision": 0.8187643627479353,
101
- "eval_recall": 0.8191214470284238,
102
- "eval_runtime": 1.6719,
103
- "eval_samples_per_second": 231.477,
104
- "eval_steps_per_second": 29.308,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
- "eval_accuracy": 0.8475452196382429,
110
- "eval_f1": 0.8473710740005564,
111
- "eval_loss": 0.4363822937011719,
112
- "eval_precision": 0.8483238707679635,
113
- "eval_recall": 0.8475452196382429,
114
- "eval_runtime": 1.6838,
115
- "eval_samples_per_second": 229.835,
116
- "eval_steps_per_second": 29.101,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
- "eval_accuracy": 0.8578811369509044,
122
- "eval_f1": 0.8567532661685897,
123
- "eval_loss": 0.42906680703163147,
124
- "eval_precision": 0.8560504853170988,
125
- "eval_recall": 0.8578811369509044,
126
- "eval_runtime": 1.6875,
127
- "eval_samples_per_second": 229.333,
128
- "eval_steps_per_second": 29.037,
129
  "step": 242
130
  },
131
  {
132
  "epoch": 10.969072164948454,
133
- "eval_accuracy": 0.8501291989664083,
134
- "eval_f1": 0.8527674409653727,
135
- "eval_loss": 0.46987947821617126,
136
- "eval_precision": 0.858217159873796,
137
- "eval_recall": 0.8501291989664083,
138
- "eval_runtime": 1.6795,
139
- "eval_samples_per_second": 230.425,
140
- "eval_steps_per_second": 29.175,
141
  "step": 266
142
  },
143
  {
144
  "epoch": 12.0,
145
  "eval_accuracy": 0.8449612403100775,
146
- "eval_f1": 0.8479775479649662,
147
- "eval_loss": 0.48616188764572144,
148
- "eval_precision": 0.8536273463382107,
149
  "eval_recall": 0.8449612403100775,
150
- "eval_runtime": 1.6948,
151
- "eval_samples_per_second": 228.339,
152
- "eval_steps_per_second": 28.911,
153
  "step": 291
154
  }
155
  ],
 
1
  {
2
+ "best_metric": 0.8552971576227391,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-266",
4
  "epoch": 12.0,
5
  "eval_steps": 500,
6
  "global_step": 291,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.813953488372093,
98
+ "eval_f1": 0.8191057475344072,
99
+ "eval_loss": 0.461563378572464,
100
+ "eval_precision": 0.8344562802960127,
101
+ "eval_recall": 0.813953488372093,
102
+ "eval_runtime": 1.6538,
103
+ "eval_samples_per_second": 234.004,
104
+ "eval_steps_per_second": 29.628,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8113695090439277,
110
+ "eval_f1": 0.8021117074140329,
111
+ "eval_loss": 0.5000612735748291,
112
+ "eval_precision": 0.8141611295681063,
113
+ "eval_recall": 0.8113695090439277,
114
+ "eval_runtime": 1.6675,
115
+ "eval_samples_per_second": 232.083,
116
+ "eval_steps_per_second": 29.385,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
+ "eval_accuracy": 0.8165374677002584,
122
+ "eval_f1": 0.8125708918966329,
123
+ "eval_loss": 0.4529660940170288,
124
+ "eval_precision": 0.8130985716009446,
125
+ "eval_recall": 0.8165374677002584,
126
+ "eval_runtime": 1.6635,
127
+ "eval_samples_per_second": 232.639,
128
+ "eval_steps_per_second": 29.456,
129
  "step": 242
130
  },
131
  {
132
  "epoch": 10.969072164948454,
133
+ "eval_accuracy": 0.8552971576227391,
134
+ "eval_f1": 0.8544131646946824,
135
+ "eval_loss": 0.4202619791030884,
136
+ "eval_precision": 0.8585534724896462,
137
+ "eval_recall": 0.8552971576227391,
138
+ "eval_runtime": 1.6427,
139
+ "eval_samples_per_second": 235.585,
140
+ "eval_steps_per_second": 29.829,
141
  "step": 266
142
  },
143
  {
144
  "epoch": 12.0,
145
  "eval_accuracy": 0.8449612403100775,
146
+ "eval_f1": 0.8402394437564343,
147
+ "eval_loss": 0.46208810806274414,
148
+ "eval_precision": 0.8423482544611124,
149
  "eval_recall": 0.8449612403100775,
150
+ "eval_runtime": 1.6537,
151
+ "eval_samples_per_second": 234.014,
152
+ "eval_steps_per_second": 29.63,
153
  "step": 291
154
  }
155
  ],
checkpoint-291/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6f714e9e8c7d28837d7af7be0484ab61be5f65a97f38bd6a3066e5b48907ab
3
  size 5240
checkpoint-315/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1700c80d575b455dce88b4e3c764090727ae02d64f4125396c1001f9bd02bd57
3
  size 94765560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bb78ec1a1f8bf4d1275bf57ac0b3ee703492f522708754f9c23186aadb8ddc6
3
  size 94765560
checkpoint-315/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fca7dfcd62a0d46fcb8d64a5382493902a875243c0bf05e743cbde3de662707e
3
  size 189556666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960f106a9ac787d0f0ca7217efd249996acde184597425b6324d4dbd2488881b
3
  size 189556666
checkpoint-315/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:956285c3102bd85589a4c94d7acca82629b8e4407131906dba6bfe9c2568df92
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1664f3c8866077a188c06b3bbbe492614b02e317a58b2358028d07588fd98b
3
  size 1064
checkpoint-315/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8578811369509044,
3
- "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242",
4
  "epoch": 12.989690721649485,
5
  "eval_steps": 500,
6
  "global_step": 315,
@@ -10,158 +10,158 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
- "eval_accuracy": 0.6356589147286822,
14
- "eval_f1": 0.5920563016978556,
15
- "eval_loss": 0.980873703956604,
16
- "eval_precision": 0.5920482291587493,
17
- "eval_recall": 0.6356589147286822,
18
- "eval_runtime": 1.6668,
19
- "eval_samples_per_second": 232.175,
20
- "eval_steps_per_second": 29.397,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
- "eval_accuracy": 0.7157622739018088,
26
- "eval_f1": 0.6905410405322238,
27
- "eval_loss": 0.7444477081298828,
28
- "eval_precision": 0.6992377248989063,
29
- "eval_recall": 0.7157622739018088,
30
- "eval_runtime": 1.6941,
31
- "eval_samples_per_second": 228.443,
32
- "eval_steps_per_second": 28.924,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
- "eval_accuracy": 0.7493540051679587,
38
- "eval_f1": 0.744898505571463,
39
- "eval_loss": 0.6171658039093018,
40
- "eval_precision": 0.7437592422989429,
41
- "eval_recall": 0.7493540051679587,
42
- "eval_runtime": 1.6943,
43
- "eval_samples_per_second": 228.408,
44
- "eval_steps_per_second": 28.92,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
- "eval_accuracy": 0.7984496124031008,
50
- "eval_f1": 0.7873621619744228,
51
- "eval_loss": 0.5430988073348999,
52
- "eval_precision": 0.79180344284319,
53
- "eval_recall": 0.7984496124031008,
54
- "eval_runtime": 1.7027,
55
- "eval_samples_per_second": 227.289,
56
- "eval_steps_per_second": 28.778,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
- "eval_accuracy": 0.8010335917312662,
62
- "eval_f1": 0.7974946178390901,
63
- "eval_loss": 0.5268548130989075,
64
- "eval_precision": 0.8005965453214461,
65
- "eval_recall": 0.8010335917312662,
66
- "eval_runtime": 1.6829,
67
- "eval_samples_per_second": 229.957,
68
- "eval_steps_per_second": 29.116,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
- "eval_accuracy": 0.7493540051679587,
74
- "eval_f1": 0.7551329793037762,
75
- "eval_loss": 0.5810549259185791,
76
- "eval_precision": 0.7802262423287315,
77
- "eval_recall": 0.7493540051679587,
78
- "eval_runtime": 1.6999,
79
- "eval_samples_per_second": 227.658,
80
- "eval_steps_per_second": 28.825,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
- "eval_accuracy": 0.8397932816537468,
86
- "eval_f1": 0.8354840008265724,
87
- "eval_loss": 0.44080850481987,
88
- "eval_precision": 0.8365717854569443,
89
- "eval_recall": 0.8397932816537468,
90
- "eval_runtime": 1.6785,
91
- "eval_samples_per_second": 230.56,
92
- "eval_steps_per_second": 29.192,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8191214470284238,
98
- "eval_f1": 0.8167837576120855,
99
- "eval_loss": 0.46871018409729004,
100
- "eval_precision": 0.8187643627479353,
101
- "eval_recall": 0.8191214470284238,
102
- "eval_runtime": 1.6719,
103
- "eval_samples_per_second": 231.477,
104
- "eval_steps_per_second": 29.308,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
- "eval_accuracy": 0.8475452196382429,
110
- "eval_f1": 0.8473710740005564,
111
- "eval_loss": 0.4363822937011719,
112
- "eval_precision": 0.8483238707679635,
113
- "eval_recall": 0.8475452196382429,
114
- "eval_runtime": 1.6838,
115
- "eval_samples_per_second": 229.835,
116
- "eval_steps_per_second": 29.101,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
- "eval_accuracy": 0.8578811369509044,
122
- "eval_f1": 0.8567532661685897,
123
- "eval_loss": 0.42906680703163147,
124
- "eval_precision": 0.8560504853170988,
125
- "eval_recall": 0.8578811369509044,
126
- "eval_runtime": 1.6875,
127
- "eval_samples_per_second": 229.333,
128
- "eval_steps_per_second": 29.037,
129
  "step": 242
130
  },
131
  {
132
  "epoch": 10.969072164948454,
133
- "eval_accuracy": 0.8501291989664083,
134
- "eval_f1": 0.8527674409653727,
135
- "eval_loss": 0.46987947821617126,
136
- "eval_precision": 0.858217159873796,
137
- "eval_recall": 0.8501291989664083,
138
- "eval_runtime": 1.6795,
139
- "eval_samples_per_second": 230.425,
140
- "eval_steps_per_second": 29.175,
141
  "step": 266
142
  },
143
  {
144
  "epoch": 12.0,
145
  "eval_accuracy": 0.8449612403100775,
146
- "eval_f1": 0.8479775479649662,
147
- "eval_loss": 0.48616188764572144,
148
- "eval_precision": 0.8536273463382107,
149
  "eval_recall": 0.8449612403100775,
150
- "eval_runtime": 1.6948,
151
- "eval_samples_per_second": 228.339,
152
- "eval_steps_per_second": 28.911,
153
  "step": 291
154
  },
155
  {
156
  "epoch": 12.989690721649485,
157
- "eval_accuracy": 0.8475452196382429,
158
- "eval_f1": 0.8483528512434715,
159
- "eval_loss": 0.4765341877937317,
160
- "eval_precision": 0.8497431189750078,
161
- "eval_recall": 0.8475452196382429,
162
- "eval_runtime": 1.688,
163
- "eval_samples_per_second": 229.264,
164
- "eval_steps_per_second": 29.028,
165
  "step": 315
166
  }
167
  ],
@@ -186,7 +186,7 @@
186
  "should_evaluate": false,
187
  "should_log": false,
188
  "should_save": true,
189
- "should_training_stop": true
190
  },
191
  "attributes": {}
192
  }
 
1
  {
2
+ "best_metric": 0.8552971576227391,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-266",
4
  "epoch": 12.989690721649485,
5
  "eval_steps": 500,
6
  "global_step": 315,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9896907216494846,
13
+ "eval_accuracy": 0.43410852713178294,
14
+ "eval_f1": 0.2628116488581605,
15
+ "eval_loss": 1.305585265159607,
16
+ "eval_precision": 0.18845021332852593,
17
+ "eval_recall": 0.43410852713178294,
18
+ "eval_runtime": 1.6799,
19
+ "eval_samples_per_second": 230.371,
20
+ "eval_steps_per_second": 29.168,
21
  "step": 24
22
  },
23
  {
24
  "epoch": 1.9793814432989691,
25
+ "eval_accuracy": 0.43410852713178294,
26
+ "eval_f1": 0.2628116488581605,
27
+ "eval_loss": 1.1732141971588135,
28
+ "eval_precision": 0.18845021332852593,
29
+ "eval_recall": 0.43410852713178294,
30
+ "eval_runtime": 1.6247,
31
+ "eval_samples_per_second": 238.193,
32
+ "eval_steps_per_second": 30.159,
33
  "step": 48
34
  },
35
  {
36
  "epoch": 2.9690721649484537,
37
+ "eval_accuracy": 0.6356589147286822,
38
+ "eval_f1": 0.5935030180725367,
39
+ "eval_loss": 0.9256265163421631,
40
+ "eval_precision": 0.6651105917100348,
41
+ "eval_recall": 0.6356589147286822,
42
+ "eval_runtime": 1.639,
43
+ "eval_samples_per_second": 236.125,
44
+ "eval_steps_per_second": 29.897,
45
  "step": 72
46
  },
47
  {
48
  "epoch": 4.0,
49
+ "eval_accuracy": 0.6563307493540051,
50
+ "eval_f1": 0.6387188198872827,
51
+ "eval_loss": 0.787194550037384,
52
+ "eval_precision": 0.6724057806653244,
53
+ "eval_recall": 0.6563307493540051,
54
+ "eval_runtime": 1.6513,
55
+ "eval_samples_per_second": 234.36,
56
+ "eval_steps_per_second": 29.673,
57
  "step": 97
58
  },
59
  {
60
  "epoch": 4.989690721649485,
61
+ "eval_accuracy": 0.7596899224806202,
62
+ "eval_f1": 0.7447530111625,
63
+ "eval_loss": 0.6241620182991028,
64
+ "eval_precision": 0.7615023606202671,
65
+ "eval_recall": 0.7596899224806202,
66
+ "eval_runtime": 1.6695,
67
+ "eval_samples_per_second": 231.805,
68
+ "eval_steps_per_second": 29.35,
69
  "step": 121
70
  },
71
  {
72
  "epoch": 5.979381443298969,
73
+ "eval_accuracy": 0.772609819121447,
74
+ "eval_f1": 0.7744367108711326,
75
+ "eval_loss": 0.5990303158760071,
76
+ "eval_precision": 0.8035211915446686,
77
+ "eval_recall": 0.772609819121447,
78
+ "eval_runtime": 1.7017,
79
+ "eval_samples_per_second": 227.425,
80
+ "eval_steps_per_second": 28.795,
81
  "step": 145
82
  },
83
  {
84
  "epoch": 6.969072164948454,
85
+ "eval_accuracy": 0.7906976744186046,
86
+ "eval_f1": 0.7889441910896717,
87
+ "eval_loss": 0.5285641551017761,
88
+ "eval_precision": 0.8075499035137905,
89
+ "eval_recall": 0.7906976744186046,
90
+ "eval_runtime": 1.6621,
91
+ "eval_samples_per_second": 232.838,
92
+ "eval_steps_per_second": 29.481,
93
  "step": 169
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.813953488372093,
98
+ "eval_f1": 0.8191057475344072,
99
+ "eval_loss": 0.461563378572464,
100
+ "eval_precision": 0.8344562802960127,
101
+ "eval_recall": 0.813953488372093,
102
+ "eval_runtime": 1.6538,
103
+ "eval_samples_per_second": 234.004,
104
+ "eval_steps_per_second": 29.628,
105
  "step": 194
106
  },
107
  {
108
  "epoch": 8.989690721649485,
109
+ "eval_accuracy": 0.8113695090439277,
110
+ "eval_f1": 0.8021117074140329,
111
+ "eval_loss": 0.5000612735748291,
112
+ "eval_precision": 0.8141611295681063,
113
+ "eval_recall": 0.8113695090439277,
114
+ "eval_runtime": 1.6675,
115
+ "eval_samples_per_second": 232.083,
116
+ "eval_steps_per_second": 29.385,
117
  "step": 218
118
  },
119
  {
120
  "epoch": 9.97938144329897,
121
+ "eval_accuracy": 0.8165374677002584,
122
+ "eval_f1": 0.8125708918966329,
123
+ "eval_loss": 0.4529660940170288,
124
+ "eval_precision": 0.8130985716009446,
125
+ "eval_recall": 0.8165374677002584,
126
+ "eval_runtime": 1.6635,
127
+ "eval_samples_per_second": 232.639,
128
+ "eval_steps_per_second": 29.456,
129
  "step": 242
130
  },
131
  {
132
  "epoch": 10.969072164948454,
133
+ "eval_accuracy": 0.8552971576227391,
134
+ "eval_f1": 0.8544131646946824,
135
+ "eval_loss": 0.4202619791030884,
136
+ "eval_precision": 0.8585534724896462,
137
+ "eval_recall": 0.8552971576227391,
138
+ "eval_runtime": 1.6427,
139
+ "eval_samples_per_second": 235.585,
140
+ "eval_steps_per_second": 29.829,
141
  "step": 266
142
  },
143
  {
144
  "epoch": 12.0,
145
  "eval_accuracy": 0.8449612403100775,
146
+ "eval_f1": 0.8402394437564343,
147
+ "eval_loss": 0.46208810806274414,
148
+ "eval_precision": 0.8423482544611124,
149
  "eval_recall": 0.8449612403100775,
150
+ "eval_runtime": 1.6537,
151
+ "eval_samples_per_second": 234.014,
152
+ "eval_steps_per_second": 29.63,
153
  "step": 291
154
  },
155
  {
156
  "epoch": 12.989690721649485,
157
+ "eval_accuracy": 0.8501291989664083,
158
+ "eval_f1": 0.8470800661185643,
159
+ "eval_loss": 0.4583089351654053,
160
+ "eval_precision": 0.8492839490324154,
161
+ "eval_recall": 0.8501291989664083,
162
+ "eval_runtime": 1.6679,
163
+ "eval_samples_per_second": 232.035,
164
+ "eval_steps_per_second": 29.379,
165
  "step": 315
166
  }
167
  ],
 
186
  "should_evaluate": false,
187
  "should_log": false,
188
  "should_save": true,
189
+ "should_training_stop": false
190
  },
191
  "attributes": {}
192
  }