|
{ |
|
"best_metric": 0.45121951219512196, |
|
"best_model_checkpoint": "wav2vec2-base-berkeley-emotions-decay-f1/checkpoint-2958", |
|
"epoch": 17.0, |
|
"global_step": 2958, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"loss": 2.569, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"loss": 2.567, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.172413793103448e-07, |
|
"loss": 2.568, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.896551724137931e-07, |
|
"loss": 2.5676, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.620689655172414e-07, |
|
"loss": 2.562, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.0344827586206896e-06, |
|
"loss": 2.564, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.206896551724138e-06, |
|
"loss": 2.5549, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.3793103448275862e-06, |
|
"loss": 2.5454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.5517241379310346e-06, |
|
"loss": 2.5459, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7241379310344827e-06, |
|
"loss": 2.5321, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8965517241379312e-06, |
|
"loss": 2.51, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.068965517241379e-06, |
|
"loss": 2.5365, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.241379310344828e-06, |
|
"loss": 2.5137, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.413793103448276e-06, |
|
"loss": 2.4893, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.5862068965517246e-06, |
|
"loss": 2.5016, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 2.4782, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.931034482758621e-06, |
|
"loss": 2.4916, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.21138211382113825, |
|
"eval_loss": 2.459113836288452, |
|
"eval_runtime": 12.4194, |
|
"eval_samples_per_second": 19.808, |
|
"eval_steps_per_second": 9.904, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.103448275862069e-06, |
|
"loss": 2.4868, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.2758620689655175e-06, |
|
"loss": 2.3999, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4482758620689654e-06, |
|
"loss": 2.4493, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.620689655172414e-06, |
|
"loss": 2.3774, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.7931034482758625e-06, |
|
"loss": 2.3841, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.96551724137931e-06, |
|
"loss": 2.3626, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.137931034482758e-06, |
|
"loss": 2.3698, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.310344827586207e-06, |
|
"loss": 2.3962, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.482758620689656e-06, |
|
"loss": 2.3698, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.655172413793104e-06, |
|
"loss": 2.3315, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.827586206896552e-06, |
|
"loss": 2.3425, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.9999999999999996e-06, |
|
"loss": 2.4017, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5.172413793103449e-06, |
|
"loss": 2.2892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.344827586206897e-06, |
|
"loss": 2.3775, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5.517241379310345e-06, |
|
"loss": 2.2589, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.689655172413793e-06, |
|
"loss": 2.2562, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.862068965517242e-06, |
|
"loss": 2.2591, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.3048780487804878, |
|
"eval_loss": 2.270204782485962, |
|
"eval_runtime": 11.4533, |
|
"eval_samples_per_second": 21.479, |
|
"eval_steps_per_second": 10.739, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.0344827586206896e-06, |
|
"loss": 2.3039, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 2.1748, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 6.379310344827586e-06, |
|
"loss": 2.2222, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 6.551724137931035e-06, |
|
"loss": 2.2989, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 6.724137931034483e-06, |
|
"loss": 2.1511, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.896551724137931e-06, |
|
"loss": 2.2883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.0689655172413796e-06, |
|
"loss": 2.163, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.241379310344828e-06, |
|
"loss": 2.1922, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.413793103448276e-06, |
|
"loss": 2.216, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.586206896551725e-06, |
|
"loss": 2.2721, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.758620689655173e-06, |
|
"loss": 2.2441, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.93103448275862e-06, |
|
"loss": 2.2579, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.103448275862069e-06, |
|
"loss": 2.1499, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.275862068965517e-06, |
|
"loss": 2.3075, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.448275862068966e-06, |
|
"loss": 2.1775, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 2.2148, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.793103448275862e-06, |
|
"loss": 2.1458, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.965517241379312e-06, |
|
"loss": 2.1159, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.2845528455284553, |
|
"eval_loss": 2.224947929382324, |
|
"eval_runtime": 11.2398, |
|
"eval_samples_per_second": 21.887, |
|
"eval_steps_per_second": 10.943, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 9.13793103448276e-06, |
|
"loss": 2.0612, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.310344827586207e-06, |
|
"loss": 2.1366, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 9.482758620689655e-06, |
|
"loss": 2.0171, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.655172413793103e-06, |
|
"loss": 2.1402, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 9.827586206896551e-06, |
|
"loss": 2.1011, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 2.1832, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.0172413793103447e-05, |
|
"loss": 2.1226, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.0344827586206898e-05, |
|
"loss": 2.0552, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0517241379310346e-05, |
|
"loss": 2.0428, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.0689655172413794e-05, |
|
"loss": 2.119, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.0862068965517242e-05, |
|
"loss": 2.1665, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.103448275862069e-05, |
|
"loss": 2.0697, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.1206896551724138e-05, |
|
"loss": 2.0581, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.1379310344827586e-05, |
|
"loss": 2.2953, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.1551724137931034e-05, |
|
"loss": 2.09, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.1724137931034483e-05, |
|
"loss": 1.9762, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.1896551724137931e-05, |
|
"loss": 2.1636, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.3170731707317073, |
|
"eval_loss": 2.092029571533203, |
|
"eval_runtime": 11.84, |
|
"eval_samples_per_second": 20.777, |
|
"eval_steps_per_second": 10.388, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.2068965517241379e-05, |
|
"loss": 2.0153, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.2241379310344829e-05, |
|
"loss": 2.0732, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.2413793103448277e-05, |
|
"loss": 1.9913, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.2586206896551725e-05, |
|
"loss": 2.0738, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.2758620689655172e-05, |
|
"loss": 1.9731, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.293103448275862e-05, |
|
"loss": 2.1501, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.310344827586207e-05, |
|
"loss": 2.0999, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.3275862068965518e-05, |
|
"loss": 2.0082, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.3448275862068966e-05, |
|
"loss": 2.0015, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.3620689655172414e-05, |
|
"loss": 2.0501, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.3793103448275862e-05, |
|
"loss": 1.8933, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.396551724137931e-05, |
|
"loss": 2.0852, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.4137931034482759e-05, |
|
"loss": 2.1491, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.4310344827586207e-05, |
|
"loss": 2.0248, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.4482758620689657e-05, |
|
"loss": 2.0256, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.4655172413793105e-05, |
|
"loss": 2.1742, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.4827586206896552e-05, |
|
"loss": 1.9253, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.0857, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.3089430894308943, |
|
"eval_loss": 2.0949580669403076, |
|
"eval_runtime": 12.0187, |
|
"eval_samples_per_second": 20.468, |
|
"eval_steps_per_second": 10.234, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.517241379310345e-05, |
|
"loss": 1.9912, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.5344827586206898e-05, |
|
"loss": 1.8998, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 1.5517241379310346e-05, |
|
"loss": 1.8834, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.5689655172413794e-05, |
|
"loss": 1.8255, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 1.586206896551724e-05, |
|
"loss": 2.0895, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 1.603448275862069e-05, |
|
"loss": 1.9981, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 1.6206896551724137e-05, |
|
"loss": 1.9582, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.637931034482759e-05, |
|
"loss": 2.05, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 1.6551724137931033e-05, |
|
"loss": 1.9791, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 1.6724137931034485e-05, |
|
"loss": 2.0748, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.6896551724137932e-05, |
|
"loss": 1.9022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 1.706896551724138e-05, |
|
"loss": 1.9849, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 1.9502, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.7413793103448276e-05, |
|
"loss": 1.8595, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 1.7586206896551724e-05, |
|
"loss": 1.8499, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 1.7758620689655172e-05, |
|
"loss": 1.8505, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 1.7931034482758623e-05, |
|
"loss": 2.0275, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.3252032520325203, |
|
"eval_loss": 2.106571912765503, |
|
"eval_runtime": 11.9924, |
|
"eval_samples_per_second": 20.513, |
|
"eval_steps_per_second": 10.257, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.8103448275862068e-05, |
|
"loss": 2.0014, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.827586206896552e-05, |
|
"loss": 2.2037, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.8448275862068964e-05, |
|
"loss": 1.9573, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 1.8620689655172415e-05, |
|
"loss": 1.8635, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.8793103448275863e-05, |
|
"loss": 1.9216, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.896551724137931e-05, |
|
"loss": 1.7776, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 1.9137931034482762e-05, |
|
"loss": 2.0595, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 1.9310344827586207e-05, |
|
"loss": 1.8406, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.9482758620689658e-05, |
|
"loss": 1.8169, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.9655172413793102e-05, |
|
"loss": 1.8881, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.9827586206896554e-05, |
|
"loss": 1.64, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.9238, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.017241379310345e-05, |
|
"loss": 2.0383, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 2.0344827586206894e-05, |
|
"loss": 1.9494, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 2.0517241379310345e-05, |
|
"loss": 1.9281, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 2.0689655172413797e-05, |
|
"loss": 1.9734, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 2.086206896551724e-05, |
|
"loss": 1.8332, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.3821138211382114, |
|
"eval_loss": 1.9587126970291138, |
|
"eval_runtime": 11.5, |
|
"eval_samples_per_second": 21.391, |
|
"eval_steps_per_second": 10.696, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 2.1034482758620692e-05, |
|
"loss": 1.9479, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.1206896551724137e-05, |
|
"loss": 1.8237, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 2.137931034482759e-05, |
|
"loss": 1.8681, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.1551724137931033e-05, |
|
"loss": 1.8791, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 2.1724137931034484e-05, |
|
"loss": 1.8906, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 2.1896551724137932e-05, |
|
"loss": 2.0289, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 2.206896551724138e-05, |
|
"loss": 1.8865, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.2241379310344828e-05, |
|
"loss": 1.8217, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 2.2413793103448276e-05, |
|
"loss": 1.7755, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.2586206896551727e-05, |
|
"loss": 1.6638, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.275862068965517e-05, |
|
"loss": 1.7748, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 2.2931034482758623e-05, |
|
"loss": 1.8823, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.3103448275862067e-05, |
|
"loss": 1.6694, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 2.327586206896552e-05, |
|
"loss": 1.9103, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.3448275862068967e-05, |
|
"loss": 2.0666, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 2.3620689655172415e-05, |
|
"loss": 1.6608, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 2.3793103448275862e-05, |
|
"loss": 1.8385, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 2.396551724137931e-05, |
|
"loss": 2.0224, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.32926829268292684, |
|
"eval_loss": 2.060194730758667, |
|
"eval_runtime": 12.5242, |
|
"eval_samples_per_second": 19.642, |
|
"eval_steps_per_second": 9.821, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 2.4137931034482758e-05, |
|
"loss": 1.831, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.4310344827586206e-05, |
|
"loss": 1.7572, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 2.4482758620689657e-05, |
|
"loss": 1.8719, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 2.4655172413793105e-05, |
|
"loss": 1.8331, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 2.4827586206896553e-05, |
|
"loss": 1.9726, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.7639, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 2.517241379310345e-05, |
|
"loss": 1.9096, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 2.5344827586206897e-05, |
|
"loss": 1.8732, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 2.5517241379310345e-05, |
|
"loss": 1.5993, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 2.5689655172413793e-05, |
|
"loss": 1.7585, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 2.586206896551724e-05, |
|
"loss": 1.8841, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 2.603448275862069e-05, |
|
"loss": 1.5775, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.620689655172414e-05, |
|
"loss": 1.7454, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.6379310344827588e-05, |
|
"loss": 1.9249, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 2.6551724137931036e-05, |
|
"loss": 1.8351, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 2.6724137931034484e-05, |
|
"loss": 1.6522, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.689655172413793e-05, |
|
"loss": 1.8086, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.32926829268292684, |
|
"eval_loss": 2.068312406539917, |
|
"eval_runtime": 12.1374, |
|
"eval_samples_per_second": 20.268, |
|
"eval_steps_per_second": 10.134, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 2.706896551724138e-05, |
|
"loss": 1.7357, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 2.7241379310344827e-05, |
|
"loss": 1.6286, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 2.741379310344828e-05, |
|
"loss": 1.6207, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 2.7586206896551723e-05, |
|
"loss": 1.6797, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 2.7758620689655175e-05, |
|
"loss": 1.6136, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.793103448275862e-05, |
|
"loss": 1.5962, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 2.810344827586207e-05, |
|
"loss": 1.8679, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.8275862068965518e-05, |
|
"loss": 1.9907, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.8448275862068966e-05, |
|
"loss": 1.8782, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.8620689655172414e-05, |
|
"loss": 1.6307, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.8793103448275862e-05, |
|
"loss": 1.7819, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 2.8965517241379313e-05, |
|
"loss": 1.9863, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 2.9137931034482758e-05, |
|
"loss": 1.8648, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.931034482758621e-05, |
|
"loss": 1.8474, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 2.9482758620689654e-05, |
|
"loss": 1.9404, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 2.9655172413793105e-05, |
|
"loss": 1.9395, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 2.982758620689655e-05, |
|
"loss": 1.6639, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8941, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.34552845528455284, |
|
"eval_loss": 2.014744997024536, |
|
"eval_runtime": 12.1182, |
|
"eval_samples_per_second": 20.3, |
|
"eval_steps_per_second": 10.15, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 2.9980842911877396e-05, |
|
"loss": 1.6203, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 2.9961685823754788e-05, |
|
"loss": 1.6125, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 2.9942528735632184e-05, |
|
"loss": 1.5883, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 2.992337164750958e-05, |
|
"loss": 1.658, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 2.9904214559386975e-05, |
|
"loss": 1.5954, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 2.9885057471264367e-05, |
|
"loss": 1.7201, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 2.9865900383141762e-05, |
|
"loss": 1.5687, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 2.9846743295019157e-05, |
|
"loss": 1.7798, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 2.982758620689655e-05, |
|
"loss": 1.684, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 2.9808429118773948e-05, |
|
"loss": 1.6633, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 2.9789272030651344e-05, |
|
"loss": 1.8764, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 2.977011494252874e-05, |
|
"loss": 1.7563, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 2.975095785440613e-05, |
|
"loss": 1.7403, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 2.9731800766283527e-05, |
|
"loss": 1.5366, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 2.9712643678160922e-05, |
|
"loss": 1.6187, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 2.9693486590038314e-05, |
|
"loss": 1.8297, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 2.967432950191571e-05, |
|
"loss": 1.7848, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.4268292682926829, |
|
"eval_loss": 1.944414734840393, |
|
"eval_runtime": 12.1026, |
|
"eval_samples_per_second": 20.326, |
|
"eval_steps_per_second": 10.163, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 2.9655172413793105e-05, |
|
"loss": 1.6721, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 2.96360153256705e-05, |
|
"loss": 1.3034, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 2.9616858237547892e-05, |
|
"loss": 1.6881, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 2.9597701149425288e-05, |
|
"loss": 1.4882, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 2.9578544061302683e-05, |
|
"loss": 1.4722, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 2.9559386973180075e-05, |
|
"loss": 1.5784, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 2.954022988505747e-05, |
|
"loss": 1.5223, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 2.9521072796934866e-05, |
|
"loss": 1.6843, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 2.950191570881226e-05, |
|
"loss": 1.6447, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 2.9482758620689654e-05, |
|
"loss": 1.6076, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 2.946360153256705e-05, |
|
"loss": 1.6644, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 2.9444444444444445e-05, |
|
"loss": 1.7464, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 2.942528735632184e-05, |
|
"loss": 1.5289, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 2.9406130268199235e-05, |
|
"loss": 1.6279, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 2.938697318007663e-05, |
|
"loss": 1.5911, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 2.9367816091954026e-05, |
|
"loss": 1.6629, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 2.9348659003831418e-05, |
|
"loss": 1.4013, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.36585365853658536, |
|
"eval_loss": 1.9624298810958862, |
|
"eval_runtime": 11.265, |
|
"eval_samples_per_second": 21.838, |
|
"eval_steps_per_second": 10.919, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 2.9329501915708814e-05, |
|
"loss": 1.4498, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 2.931034482758621e-05, |
|
"loss": 1.3999, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 2.92911877394636e-05, |
|
"loss": 1.3161, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 2.9272030651340997e-05, |
|
"loss": 1.3258, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 2.9252873563218392e-05, |
|
"loss": 1.3642, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 2.9233716475095787e-05, |
|
"loss": 1.5188, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 2.921455938697318e-05, |
|
"loss": 1.6076, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 2.9195402298850575e-05, |
|
"loss": 1.6371, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 2.917624521072797e-05, |
|
"loss": 1.4424, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 2.9157088122605362e-05, |
|
"loss": 1.3859, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 2.9137931034482758e-05, |
|
"loss": 1.4307, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 2.9118773946360153e-05, |
|
"loss": 1.5026, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 2.909961685823755e-05, |
|
"loss": 1.6566, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 2.908045977011494e-05, |
|
"loss": 1.6411, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 2.9061302681992336e-05, |
|
"loss": 1.574, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 2.9042145593869735e-05, |
|
"loss": 1.486, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 2.9022988505747127e-05, |
|
"loss": 1.6049, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 2.9003831417624522e-05, |
|
"loss": 1.3337, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.4227642276422765, |
|
"eval_loss": 1.7803982496261597, |
|
"eval_runtime": 11.4307, |
|
"eval_samples_per_second": 21.521, |
|
"eval_steps_per_second": 10.76, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 2.8984674329501918e-05, |
|
"loss": 1.4821, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 2.8965517241379313e-05, |
|
"loss": 1.4081, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 2.8946360153256705e-05, |
|
"loss": 1.3393, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 2.89272030651341e-05, |
|
"loss": 1.3755, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 2.8908045977011496e-05, |
|
"loss": 1.3658, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 1.0842, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 2.8869731800766284e-05, |
|
"loss": 1.3182, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 2.885057471264368e-05, |
|
"loss": 1.4022, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 2.8831417624521075e-05, |
|
"loss": 1.3685, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 2.8812260536398467e-05, |
|
"loss": 1.488, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 2.8793103448275862e-05, |
|
"loss": 1.224, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.8773946360153257e-05, |
|
"loss": 1.3765, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 2.875478927203065e-05, |
|
"loss": 1.3204, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 2.8735632183908045e-05, |
|
"loss": 1.2625, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 2.871647509578544e-05, |
|
"loss": 1.2118, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 2.8697318007662836e-05, |
|
"loss": 1.4499, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 2.8678160919540228e-05, |
|
"loss": 1.3977, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.3699186991869919, |
|
"eval_loss": 1.8904528617858887, |
|
"eval_runtime": 11.8691, |
|
"eval_samples_per_second": 20.726, |
|
"eval_steps_per_second": 10.363, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.8659003831417627e-05, |
|
"loss": 1.6674, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 2.8639846743295022e-05, |
|
"loss": 1.2464, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 2.8620689655172414e-05, |
|
"loss": 1.2784, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 2.860153256704981e-05, |
|
"loss": 1.4698, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 2.8582375478927205e-05, |
|
"loss": 1.4046, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 2.85632183908046e-05, |
|
"loss": 1.0906, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 2.8544061302681992e-05, |
|
"loss": 1.3381, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 2.8524904214559388e-05, |
|
"loss": 1.1792, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.8505747126436783e-05, |
|
"loss": 1.2152, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 2.848659003831418e-05, |
|
"loss": 1.4155, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 2.846743295019157e-05, |
|
"loss": 1.1157, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 2.8448275862068966e-05, |
|
"loss": 1.2993, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 2.842911877394636e-05, |
|
"loss": 1.2198, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 2.8409961685823754e-05, |
|
"loss": 1.1076, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 2.839080459770115e-05, |
|
"loss": 1.1955, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 2.8371647509578545e-05, |
|
"loss": 1.2308, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 2.835249042145594e-05, |
|
"loss": 1.3558, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.8333333333333332e-05, |
|
"loss": 1.3117, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.4146341463414634, |
|
"eval_loss": 1.8130720853805542, |
|
"eval_runtime": 12.5653, |
|
"eval_samples_per_second": 19.578, |
|
"eval_steps_per_second": 9.789, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 2.8314176245210727e-05, |
|
"loss": 1.1162, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 2.8295019157088123e-05, |
|
"loss": 1.1019, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 2.8275862068965518e-05, |
|
"loss": 1.1809, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 2.8256704980842914e-05, |
|
"loss": 1.2734, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 2.823754789272031e-05, |
|
"loss": 1.0834, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 2.8218390804597705e-05, |
|
"loss": 1.173, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 2.8199233716475097e-05, |
|
"loss": 1.2238, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 2.8180076628352492e-05, |
|
"loss": 1.2348, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 2.8160919540229887e-05, |
|
"loss": 0.9774, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 2.814176245210728e-05, |
|
"loss": 1.1478, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 2.8122605363984675e-05, |
|
"loss": 1.2591, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 2.810344827586207e-05, |
|
"loss": 1.1669, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 2.8084291187739466e-05, |
|
"loss": 1.2834, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 2.8065134099616858e-05, |
|
"loss": 1.2256, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 2.8045977011494253e-05, |
|
"loss": 1.3441, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 2.802681992337165e-05, |
|
"loss": 1.1368, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 2.800766283524904e-05, |
|
"loss": 1.0474, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.44308943089430897, |
|
"eval_loss": 1.9097241163253784, |
|
"eval_runtime": 11.5235, |
|
"eval_samples_per_second": 21.348, |
|
"eval_steps_per_second": 10.674, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 2.7988505747126436e-05, |
|
"loss": 1.2468, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 2.796934865900383e-05, |
|
"loss": 1.2014, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 2.7950191570881227e-05, |
|
"loss": 1.0133, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 2.793103448275862e-05, |
|
"loss": 1.0917, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 2.7911877394636014e-05, |
|
"loss": 1.2575, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 2.7892720306513413e-05, |
|
"loss": 1.1548, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 2.7873563218390805e-05, |
|
"loss": 0.9297, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 2.78544061302682e-05, |
|
"loss": 1.0547, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 2.7835249042145596e-05, |
|
"loss": 0.924, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 2.781609195402299e-05, |
|
"loss": 1.0336, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 2.7796934865900384e-05, |
|
"loss": 0.9839, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.8042, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 2.7758620689655175e-05, |
|
"loss": 1.0443, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 2.7739463601532567e-05, |
|
"loss": 1.0883, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 2.7720306513409962e-05, |
|
"loss": 1.0444, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 2.7701149425287357e-05, |
|
"loss": 1.1755, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 2.7681992337164753e-05, |
|
"loss": 0.8636, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.45121951219512196, |
|
"eval_loss": 1.9556288719177246, |
|
"eval_runtime": 11.4652, |
|
"eval_samples_per_second": 21.456, |
|
"eval_steps_per_second": 10.728, |
|
"step": 2958 |
|
} |
|
], |
|
"max_steps": 17400, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.0921876643966929e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|