{ "best_metric": 0.8478260869565217, "best_model_checkpoint": "BEiT-DMAE-13XDA-REVAL-80-32\\checkpoint-1144", "epoch": 79.66804979253112, "eval_steps": 500, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 7.291666666666665e-07, "loss": 1.6222, "step": 10 }, { "epoch": 0.33, "learning_rate": 1.458333333333333e-06, "loss": 1.6488, "step": 20 }, { "epoch": 0.5, "learning_rate": 2.1875e-06, "loss": 1.611, "step": 30 }, { "epoch": 0.66, "learning_rate": 2.916666666666666e-06, "loss": 1.6356, "step": 40 }, { "epoch": 0.83, "learning_rate": 3.6458333333333333e-06, "loss": 1.602, "step": 50 }, { "epoch": 1.0, "learning_rate": 4.375e-06, "loss": 1.5474, "step": 60 }, { "epoch": 1.0, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.2944936752319336, "eval_runtime": 0.6632, "eval_samples_per_second": 69.361, "eval_steps_per_second": 4.524, "step": 60 }, { "epoch": 1.16, "learning_rate": 5.104166666666667e-06, "loss": 1.561, "step": 70 }, { "epoch": 1.33, "learning_rate": 5.833333333333332e-06, "loss": 1.4872, "step": 80 }, { "epoch": 1.49, "learning_rate": 6.5624999999999994e-06, "loss": 1.458, "step": 90 }, { "epoch": 1.66, "learning_rate": 7.291666666666667e-06, "loss": 1.4426, "step": 100 }, { "epoch": 1.83, "learning_rate": 8.020833333333333e-06, "loss": 1.4296, "step": 110 }, { "epoch": 1.99, "learning_rate": 8.75e-06, "loss": 1.3959, "step": 120 }, { "epoch": 1.99, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.2745169401168823, "eval_runtime": 0.6327, "eval_samples_per_second": 72.708, "eval_steps_per_second": 4.742, "step": 120 }, { "epoch": 2.16, "learning_rate": 9.479166666666666e-06, "loss": 1.3373, "step": 130 }, { "epoch": 2.32, "learning_rate": 1.0208333333333334e-05, "loss": 1.2946, "step": 140 }, { "epoch": 2.49, "learning_rate": 1.0937499999999998e-05, "loss": 1.236, "step": 150 }, { "epoch": 2.66, "learning_rate": 1.1666666666666665e-05, "loss": 1.1899, "step": 160 }, { "epoch": 2.82, "learning_rate": 1.2395833333333333e-05, "loss": 1.1186, "step": 170 }, { "epoch": 2.99, "learning_rate": 1.3124999999999999e-05, "loss": 1.0517, "step": 180 }, { "epoch": 2.99, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9631962180137634, "eval_runtime": 0.6872, "eval_samples_per_second": 66.943, "eval_steps_per_second": 4.366, "step": 180 }, { "epoch": 3.15, "learning_rate": 1.3854166666666665e-05, "loss": 0.9828, "step": 190 }, { "epoch": 3.32, "learning_rate": 1.4583333333333333e-05, "loss": 0.9259, "step": 200 }, { "epoch": 3.49, "learning_rate": 1.53125e-05, "loss": 0.8746, "step": 210 }, { "epoch": 3.65, "learning_rate": 1.6041666666666666e-05, "loss": 0.8467, "step": 220 }, { "epoch": 3.82, "learning_rate": 1.6770833333333332e-05, "loss": 0.7669, "step": 230 }, { "epoch": 3.98, "learning_rate": 1.75e-05, "loss": 0.7273, "step": 240 }, { "epoch": 4.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.7708569169044495, "eval_runtime": 0.6952, "eval_samples_per_second": 66.169, "eval_steps_per_second": 4.315, "step": 241 }, { "epoch": 4.15, "learning_rate": 1.8229166666666665e-05, "loss": 0.6937, "step": 250 }, { "epoch": 4.32, "learning_rate": 1.895833333333333e-05, "loss": 0.6738, "step": 260 }, { "epoch": 4.48, "learning_rate": 1.9687499999999997e-05, "loss": 0.6577, "step": 270 }, { "epoch": 4.65, "learning_rate": 2.0416666666666667e-05, "loss": 0.6679, "step": 280 }, { "epoch": 4.81, "learning_rate": 2.114583333333333e-05, "loss": 0.5962, "step": 290 }, { "epoch": 4.98, "learning_rate": 2.1874999999999996e-05, "loss": 0.5246, "step": 300 }, { "epoch": 5.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.7217094302177429, "eval_runtime": 0.6875, "eval_samples_per_second": 66.909, "eval_steps_per_second": 4.364, "step": 301 }, { "epoch": 5.15, "learning_rate": 2.2604166666666666e-05, "loss": 0.4905, "step": 310 }, { "epoch": 5.31, "learning_rate": 2.333333333333333e-05, "loss": 0.4651, "step": 320 }, { "epoch": 5.48, "learning_rate": 2.40625e-05, "loss": 0.3947, "step": 330 }, { "epoch": 5.64, "learning_rate": 2.4791666666666665e-05, "loss": 0.3819, "step": 340 }, { "epoch": 5.81, "learning_rate": 2.552083333333333e-05, "loss": 0.3977, "step": 350 }, { "epoch": 5.98, "learning_rate": 2.6249999999999998e-05, "loss": 0.3645, "step": 360 }, { "epoch": 5.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7142456769943237, "eval_runtime": 0.6651, "eval_samples_per_second": 69.157, "eval_steps_per_second": 4.51, "step": 361 }, { "epoch": 6.14, "learning_rate": 2.6979166666666664e-05, "loss": 0.3382, "step": 370 }, { "epoch": 6.31, "learning_rate": 2.770833333333333e-05, "loss": 0.2709, "step": 380 }, { "epoch": 6.47, "learning_rate": 2.8437499999999997e-05, "loss": 0.2641, "step": 390 }, { "epoch": 6.64, "learning_rate": 2.9166666666666666e-05, "loss": 0.2518, "step": 400 }, { "epoch": 6.8, "learning_rate": 2.989583333333333e-05, "loss": 0.2631, "step": 410 }, { "epoch": 6.97, "learning_rate": 3.0625e-05, "loss": 0.2211, "step": 420 }, { "epoch": 6.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.6436068415641785, "eval_runtime": 0.6647, "eval_samples_per_second": 69.209, "eval_steps_per_second": 4.514, "step": 421 }, { "epoch": 7.14, "learning_rate": 3.1354166666666665e-05, "loss": 0.225, "step": 430 }, { "epoch": 7.3, "learning_rate": 3.208333333333333e-05, "loss": 0.2625, "step": 440 }, { "epoch": 7.47, "learning_rate": 3.28125e-05, "loss": 0.183, "step": 450 }, { "epoch": 7.63, "learning_rate": 3.3541666666666664e-05, "loss": 0.1866, "step": 460 }, { "epoch": 7.8, "learning_rate": 3.427083333333333e-05, "loss": 0.2547, "step": 470 }, { "epoch": 7.97, "learning_rate": 3.5e-05, "loss": 0.266, "step": 480 }, { "epoch": 8.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 1.1315726041793823, "eval_runtime": 0.6621, "eval_samples_per_second": 69.472, "eval_steps_per_second": 4.531, "step": 482 }, { "epoch": 8.13, "learning_rate": 3.491898148148148e-05, "loss": 0.2918, "step": 490 }, { "epoch": 8.3, "learning_rate": 3.483796296296296e-05, "loss": 0.209, "step": 500 }, { "epoch": 8.46, "learning_rate": 3.475694444444444e-05, "loss": 0.1858, "step": 510 }, { "epoch": 8.63, "learning_rate": 3.467592592592592e-05, "loss": 0.1967, "step": 520 }, { "epoch": 8.8, "learning_rate": 3.459490740740741e-05, "loss": 0.1914, "step": 530 }, { "epoch": 8.96, "learning_rate": 3.4513888888888886e-05, "loss": 0.1235, "step": 540 }, { "epoch": 9.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9256875514984131, "eval_runtime": 0.6634, "eval_samples_per_second": 69.344, "eval_steps_per_second": 4.522, "step": 542 }, { "epoch": 9.13, "learning_rate": 3.4432870370370365e-05, "loss": 0.2088, "step": 550 }, { "epoch": 9.29, "learning_rate": 3.435185185185185e-05, "loss": 0.2034, "step": 560 }, { "epoch": 9.46, "learning_rate": 3.427083333333333e-05, "loss": 0.1289, "step": 570 }, { "epoch": 9.63, "learning_rate": 3.418981481481481e-05, "loss": 0.162, "step": 580 }, { "epoch": 9.79, "learning_rate": 3.4108796296296296e-05, "loss": 0.1831, "step": 590 }, { "epoch": 9.96, "learning_rate": 3.4027777777777775e-05, "loss": 0.1613, "step": 600 }, { "epoch": 9.99, "eval_accuracy": 0.782608695652174, "eval_loss": 0.8526802062988281, "eval_runtime": 0.6424, "eval_samples_per_second": 71.601, "eval_steps_per_second": 4.67, "step": 602 }, { "epoch": 10.12, "learning_rate": 3.3946759259259254e-05, "loss": 0.0869, "step": 610 }, { "epoch": 10.29, "learning_rate": 3.3865740740740734e-05, "loss": 0.1814, "step": 620 }, { "epoch": 10.46, "learning_rate": 3.378472222222222e-05, "loss": 0.1488, "step": 630 }, { "epoch": 10.62, "learning_rate": 3.37037037037037e-05, "loss": 0.1249, "step": 640 }, { "epoch": 10.79, "learning_rate": 3.3622685185185185e-05, "loss": 0.1397, "step": 650 }, { "epoch": 10.95, "learning_rate": 3.3541666666666664e-05, "loss": 0.0946, "step": 660 }, { "epoch": 10.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.8274199366569519, "eval_runtime": 0.8621, "eval_samples_per_second": 53.358, "eval_steps_per_second": 3.48, "step": 662 }, { "epoch": 11.12, "learning_rate": 3.3460648148148144e-05, "loss": 0.1215, "step": 670 }, { "epoch": 11.29, "learning_rate": 3.337962962962963e-05, "loss": 0.1146, "step": 680 }, { "epoch": 11.45, "learning_rate": 3.329861111111111e-05, "loss": 0.0991, "step": 690 }, { "epoch": 11.62, "learning_rate": 3.321759259259259e-05, "loss": 0.1463, "step": 700 }, { "epoch": 11.78, "learning_rate": 3.3136574074074074e-05, "loss": 0.1049, "step": 710 }, { "epoch": 11.95, "learning_rate": 3.3055555555555553e-05, "loss": 0.1392, "step": 720 }, { "epoch": 12.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8311833739280701, "eval_runtime": 0.6987, "eval_samples_per_second": 65.839, "eval_steps_per_second": 4.294, "step": 723 }, { "epoch": 12.12, "learning_rate": 3.297453703703703e-05, "loss": 0.1116, "step": 730 }, { "epoch": 12.28, "learning_rate": 3.289351851851851e-05, "loss": 0.1217, "step": 740 }, { "epoch": 12.45, "learning_rate": 3.28125e-05, "loss": 0.0952, "step": 750 }, { "epoch": 12.61, "learning_rate": 3.273148148148148e-05, "loss": 0.0972, "step": 760 }, { "epoch": 12.78, "learning_rate": 3.2650462962962956e-05, "loss": 0.1193, "step": 770 }, { "epoch": 12.95, "learning_rate": 3.256944444444444e-05, "loss": 0.1028, "step": 780 }, { "epoch": 13.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.1959415674209595, "eval_runtime": 0.6697, "eval_samples_per_second": 68.691, "eval_steps_per_second": 4.48, "step": 783 }, { "epoch": 13.11, "learning_rate": 3.248842592592592e-05, "loss": 0.1014, "step": 790 }, { "epoch": 13.28, "learning_rate": 3.240740740740741e-05, "loss": 0.1055, "step": 800 }, { "epoch": 13.44, "learning_rate": 3.232638888888889e-05, "loss": 0.1317, "step": 810 }, { "epoch": 13.61, "learning_rate": 3.2245370370370366e-05, "loss": 0.0899, "step": 820 }, { "epoch": 13.78, "learning_rate": 3.216435185185185e-05, "loss": 0.1158, "step": 830 }, { "epoch": 13.94, "learning_rate": 3.208333333333333e-05, "loss": 0.1072, "step": 840 }, { "epoch": 13.99, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.0017004013061523, "eval_runtime": 0.6881, "eval_samples_per_second": 66.851, "eval_steps_per_second": 4.36, "step": 843 }, { "epoch": 14.11, "learning_rate": 3.200231481481481e-05, "loss": 0.0812, "step": 850 }, { "epoch": 14.27, "learning_rate": 3.19212962962963e-05, "loss": 0.1025, "step": 860 }, { "epoch": 14.44, "learning_rate": 3.1840277777777776e-05, "loss": 0.1296, "step": 870 }, { "epoch": 14.61, "learning_rate": 3.1759259259259255e-05, "loss": 0.146, "step": 880 }, { "epoch": 14.77, "learning_rate": 3.1678240740740735e-05, "loss": 0.0782, "step": 890 }, { "epoch": 14.94, "learning_rate": 3.159722222222222e-05, "loss": 0.0888, "step": 900 }, { "epoch": 14.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.921440064907074, "eval_runtime": 0.714, "eval_samples_per_second": 64.425, "eval_steps_per_second": 4.202, "step": 903 }, { "epoch": 15.1, "learning_rate": 3.15162037037037e-05, "loss": 0.077, "step": 910 }, { "epoch": 15.27, "learning_rate": 3.143518518518518e-05, "loss": 0.0821, "step": 920 }, { "epoch": 15.44, "learning_rate": 3.1354166666666665e-05, "loss": 0.0692, "step": 930 }, { "epoch": 15.6, "learning_rate": 3.1273148148148145e-05, "loss": 0.0801, "step": 940 }, { "epoch": 15.77, "learning_rate": 3.119212962962963e-05, "loss": 0.1015, "step": 950 }, { "epoch": 15.93, "learning_rate": 3.111111111111111e-05, "loss": 0.0951, "step": 960 }, { "epoch": 16.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.9156137704849243, "eval_runtime": 0.6762, "eval_samples_per_second": 68.028, "eval_steps_per_second": 4.437, "step": 964 }, { "epoch": 16.1, "learning_rate": 3.103009259259259e-05, "loss": 0.0768, "step": 970 }, { "epoch": 16.27, "learning_rate": 3.0949074074074075e-05, "loss": 0.0494, "step": 980 }, { "epoch": 16.43, "learning_rate": 3.0868055555555554e-05, "loss": 0.1444, "step": 990 }, { "epoch": 16.6, "learning_rate": 3.0787037037037034e-05, "loss": 0.086, "step": 1000 }, { "epoch": 16.76, "learning_rate": 3.070601851851851e-05, "loss": 0.0883, "step": 1010 }, { "epoch": 16.93, "learning_rate": 3.0625e-05, "loss": 0.0714, "step": 1020 }, { "epoch": 17.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.3116263151168823, "eval_runtime": 0.6431, "eval_samples_per_second": 71.525, "eval_steps_per_second": 4.665, "step": 1024 }, { "epoch": 17.1, "learning_rate": 3.054398148148148e-05, "loss": 0.0637, "step": 1030 }, { "epoch": 17.26, "learning_rate": 3.046296296296296e-05, "loss": 0.0581, "step": 1040 }, { "epoch": 17.43, "learning_rate": 3.0381944444444444e-05, "loss": 0.1138, "step": 1050 }, { "epoch": 17.59, "learning_rate": 3.0300925925925923e-05, "loss": 0.0894, "step": 1060 }, { "epoch": 17.76, "learning_rate": 3.0219907407407405e-05, "loss": 0.065, "step": 1070 }, { "epoch": 17.93, "learning_rate": 3.0138888888888888e-05, "loss": 0.0804, "step": 1080 }, { "epoch": 17.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.1107388734817505, "eval_runtime": 0.7361, "eval_samples_per_second": 62.487, "eval_steps_per_second": 4.075, "step": 1084 }, { "epoch": 18.09, "learning_rate": 3.0057870370370367e-05, "loss": 0.1195, "step": 1090 }, { "epoch": 18.26, "learning_rate": 2.997685185185185e-05, "loss": 0.117, "step": 1100 }, { "epoch": 18.42, "learning_rate": 2.989583333333333e-05, "loss": 0.0535, "step": 1110 }, { "epoch": 18.59, "learning_rate": 2.9814814814814812e-05, "loss": 0.0778, "step": 1120 }, { "epoch": 18.76, "learning_rate": 2.9733796296296295e-05, "loss": 0.1094, "step": 1130 }, { "epoch": 18.92, "learning_rate": 2.9652777777777774e-05, "loss": 0.08, "step": 1140 }, { "epoch": 18.99, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.8104602694511414, "eval_runtime": 0.9009, "eval_samples_per_second": 51.063, "eval_steps_per_second": 3.33, "step": 1144 }, { "epoch": 19.09, "learning_rate": 2.9571759259259257e-05, "loss": 0.058, "step": 1150 }, { "epoch": 19.25, "learning_rate": 2.9490740740740736e-05, "loss": 0.0811, "step": 1160 }, { "epoch": 19.42, "learning_rate": 2.940972222222222e-05, "loss": 0.0626, "step": 1170 }, { "epoch": 19.59, "learning_rate": 2.9328703703703698e-05, "loss": 0.0502, "step": 1180 }, { "epoch": 19.75, "learning_rate": 2.9247685185185184e-05, "loss": 0.0799, "step": 1190 }, { "epoch": 19.92, "learning_rate": 2.9166666666666666e-05, "loss": 0.1619, "step": 1200 }, { "epoch": 20.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.7581048011779785, "eval_runtime": 0.6451, "eval_samples_per_second": 71.304, "eval_steps_per_second": 4.65, "step": 1205 }, { "epoch": 20.08, "learning_rate": 2.9085648148148146e-05, "loss": 0.0787, "step": 1210 }, { "epoch": 20.25, "learning_rate": 2.9004629629629628e-05, "loss": 0.0764, "step": 1220 }, { "epoch": 20.41, "learning_rate": 2.8923611111111108e-05, "loss": 0.0618, "step": 1230 }, { "epoch": 20.58, "learning_rate": 2.884259259259259e-05, "loss": 0.0612, "step": 1240 }, { "epoch": 20.75, "learning_rate": 2.8761574074074073e-05, "loss": 0.0666, "step": 1250 }, { "epoch": 20.91, "learning_rate": 2.8680555555555552e-05, "loss": 0.084, "step": 1260 }, { "epoch": 21.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.0210282802581787, "eval_runtime": 0.6611, "eval_samples_per_second": 69.584, "eval_steps_per_second": 4.538, "step": 1265 }, { "epoch": 21.08, "learning_rate": 2.8599537037037035e-05, "loss": 0.068, "step": 1270 }, { "epoch": 21.24, "learning_rate": 2.8518518518518514e-05, "loss": 0.0447, "step": 1280 }, { "epoch": 21.41, "learning_rate": 2.8437499999999997e-05, "loss": 0.0568, "step": 1290 }, { "epoch": 21.58, "learning_rate": 2.835648148148148e-05, "loss": 0.0823, "step": 1300 }, { "epoch": 21.74, "learning_rate": 2.827546296296296e-05, "loss": 0.0473, "step": 1310 }, { "epoch": 21.91, "learning_rate": 2.819444444444444e-05, "loss": 0.072, "step": 1320 }, { "epoch": 21.99, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.3092247247695923, "eval_runtime": 0.6707, "eval_samples_per_second": 68.585, "eval_steps_per_second": 4.473, "step": 1325 }, { "epoch": 22.07, "learning_rate": 2.811342592592592e-05, "loss": 0.0629, "step": 1330 }, { "epoch": 22.24, "learning_rate": 2.8032407407407407e-05, "loss": 0.0908, "step": 1340 }, { "epoch": 22.41, "learning_rate": 2.795138888888889e-05, "loss": 0.0744, "step": 1350 }, { "epoch": 22.57, "learning_rate": 2.787037037037037e-05, "loss": 0.055, "step": 1360 }, { "epoch": 22.74, "learning_rate": 2.778935185185185e-05, "loss": 0.0572, "step": 1370 }, { "epoch": 22.9, "learning_rate": 2.770833333333333e-05, "loss": 0.0303, "step": 1380 }, { "epoch": 22.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.3367185592651367, "eval_runtime": 0.665, "eval_samples_per_second": 69.17, "eval_steps_per_second": 4.511, "step": 1385 }, { "epoch": 23.07, "learning_rate": 2.7627314814814813e-05, "loss": 0.0785, "step": 1390 }, { "epoch": 23.24, "learning_rate": 2.7546296296296296e-05, "loss": 0.0877, "step": 1400 }, { "epoch": 23.4, "learning_rate": 2.7465277777777775e-05, "loss": 0.0482, "step": 1410 }, { "epoch": 23.57, "learning_rate": 2.7384259259259258e-05, "loss": 0.0712, "step": 1420 }, { "epoch": 23.73, "learning_rate": 2.7303240740740737e-05, "loss": 0.0381, "step": 1430 }, { "epoch": 23.9, "learning_rate": 2.722222222222222e-05, "loss": 0.0228, "step": 1440 }, { "epoch": 24.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.0276718139648438, "eval_runtime": 0.7532, "eval_samples_per_second": 61.074, "eval_steps_per_second": 3.983, "step": 1446 }, { "epoch": 24.07, "learning_rate": 2.71412037037037e-05, "loss": 0.0696, "step": 1450 }, { "epoch": 24.23, "learning_rate": 2.706018518518518e-05, "loss": 0.0612, "step": 1460 }, { "epoch": 24.4, "learning_rate": 2.6979166666666664e-05, "loss": 0.0549, "step": 1470 }, { "epoch": 24.56, "learning_rate": 2.6898148148148143e-05, "loss": 0.0253, "step": 1480 }, { "epoch": 24.73, "learning_rate": 2.681712962962963e-05, "loss": 0.0472, "step": 1490 }, { "epoch": 24.9, "learning_rate": 2.673611111111111e-05, "loss": 0.0755, "step": 1500 }, { "epoch": 25.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.9436376094818115, "eval_runtime": 0.6656, "eval_samples_per_second": 69.111, "eval_steps_per_second": 4.507, "step": 1506 }, { "epoch": 25.06, "learning_rate": 2.665509259259259e-05, "loss": 0.0743, "step": 1510 }, { "epoch": 25.23, "learning_rate": 2.6574074074074074e-05, "loss": 0.045, "step": 1520 }, { "epoch": 25.39, "learning_rate": 2.6493055555555553e-05, "loss": 0.0669, "step": 1530 }, { "epoch": 25.56, "learning_rate": 2.6412037037037036e-05, "loss": 0.0551, "step": 1540 }, { "epoch": 25.73, "learning_rate": 2.6331018518518515e-05, "loss": 0.0256, "step": 1550 }, { "epoch": 25.89, "learning_rate": 2.6249999999999998e-05, "loss": 0.0756, "step": 1560 }, { "epoch": 25.99, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.1588078737258911, "eval_runtime": 0.6572, "eval_samples_per_second": 69.997, "eval_steps_per_second": 4.565, "step": 1566 }, { "epoch": 26.06, "learning_rate": 2.616898148148148e-05, "loss": 0.0403, "step": 1570 }, { "epoch": 26.22, "learning_rate": 2.608796296296296e-05, "loss": 0.0486, "step": 1580 }, { "epoch": 26.39, "learning_rate": 2.6006944444444442e-05, "loss": 0.0489, "step": 1590 }, { "epoch": 26.56, "learning_rate": 2.592592592592592e-05, "loss": 0.0646, "step": 1600 }, { "epoch": 26.72, "learning_rate": 2.5844907407407404e-05, "loss": 0.0713, "step": 1610 }, { "epoch": 26.89, "learning_rate": 2.5763888888888887e-05, "loss": 0.0875, "step": 1620 }, { "epoch": 26.99, "eval_accuracy": 0.717391304347826, "eval_loss": 1.3280256986618042, "eval_runtime": 0.7112, "eval_samples_per_second": 64.681, "eval_steps_per_second": 4.218, "step": 1626 }, { "epoch": 27.05, "learning_rate": 2.5682870370370366e-05, "loss": 0.0704, "step": 1630 }, { "epoch": 27.22, "learning_rate": 2.5601851851851852e-05, "loss": 0.0606, "step": 1640 }, { "epoch": 27.39, "learning_rate": 2.552083333333333e-05, "loss": 0.0385, "step": 1650 }, { "epoch": 27.55, "learning_rate": 2.5439814814814814e-05, "loss": 0.0494, "step": 1660 }, { "epoch": 27.72, "learning_rate": 2.5358796296296297e-05, "loss": 0.0171, "step": 1670 }, { "epoch": 27.88, "learning_rate": 2.5277777777777776e-05, "loss": 0.0771, "step": 1680 }, { "epoch": 28.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.8558253049850464, "eval_runtime": 0.6528, "eval_samples_per_second": 70.468, "eval_steps_per_second": 4.596, "step": 1687 }, { "epoch": 28.05, "learning_rate": 2.519675925925926e-05, "loss": 0.0447, "step": 1690 }, { "epoch": 28.22, "learning_rate": 2.5115740740740738e-05, "loss": 0.0365, "step": 1700 }, { "epoch": 28.38, "learning_rate": 2.503472222222222e-05, "loss": 0.0557, "step": 1710 }, { "epoch": 28.55, "learning_rate": 2.49537037037037e-05, "loss": 0.0447, "step": 1720 }, { "epoch": 28.71, "learning_rate": 2.4872685185185182e-05, "loss": 0.0412, "step": 1730 }, { "epoch": 28.88, "learning_rate": 2.4791666666666665e-05, "loss": 0.0467, "step": 1740 }, { "epoch": 29.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.6476134061813354, "eval_runtime": 0.6892, "eval_samples_per_second": 66.748, "eval_steps_per_second": 4.353, "step": 1747 }, { "epoch": 29.05, "learning_rate": 2.4710648148148144e-05, "loss": 0.0483, "step": 1750 }, { "epoch": 29.21, "learning_rate": 2.4629629629629627e-05, "loss": 0.1003, "step": 1760 }, { "epoch": 29.38, "learning_rate": 2.4548611111111106e-05, "loss": 0.0879, "step": 1770 }, { "epoch": 29.54, "learning_rate": 2.446759259259259e-05, "loss": 0.0333, "step": 1780 }, { "epoch": 29.71, "learning_rate": 2.4386574074074075e-05, "loss": 0.0849, "step": 1790 }, { "epoch": 29.88, "learning_rate": 2.4305555555555554e-05, "loss": 0.0382, "step": 1800 }, { "epoch": 29.99, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.9374191164970398, "eval_runtime": 0.7092, "eval_samples_per_second": 64.864, "eval_steps_per_second": 4.23, "step": 1807 }, { "epoch": 30.04, "learning_rate": 2.4224537037037037e-05, "loss": 0.0482, "step": 1810 }, { "epoch": 30.21, "learning_rate": 2.4143518518518516e-05, "loss": 0.0232, "step": 1820 }, { "epoch": 30.37, "learning_rate": 2.40625e-05, "loss": 0.0342, "step": 1830 }, { "epoch": 30.54, "learning_rate": 2.398148148148148e-05, "loss": 0.0344, "step": 1840 }, { "epoch": 30.71, "learning_rate": 2.390046296296296e-05, "loss": 0.0325, "step": 1850 }, { "epoch": 30.87, "learning_rate": 2.3819444444444443e-05, "loss": 0.0511, "step": 1860 }, { "epoch": 30.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.0847010612487793, "eval_runtime": 0.7072, "eval_samples_per_second": 65.049, "eval_steps_per_second": 4.242, "step": 1867 }, { "epoch": 31.04, "learning_rate": 2.3738425925925923e-05, "loss": 0.0243, "step": 1870 }, { "epoch": 31.2, "learning_rate": 2.3657407407407405e-05, "loss": 0.0466, "step": 1880 }, { "epoch": 31.37, "learning_rate": 2.3576388888888888e-05, "loss": 0.0543, "step": 1890 }, { "epoch": 31.54, "learning_rate": 2.3495370370370367e-05, "loss": 0.0415, "step": 1900 }, { "epoch": 31.7, "learning_rate": 2.341435185185185e-05, "loss": 0.0327, "step": 1910 }, { "epoch": 31.87, "learning_rate": 2.333333333333333e-05, "loss": 0.0161, "step": 1920 }, { "epoch": 32.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.2028273344039917, "eval_runtime": 0.6812, "eval_samples_per_second": 67.531, "eval_steps_per_second": 4.404, "step": 1928 }, { "epoch": 32.03, "learning_rate": 2.325231481481481e-05, "loss": 0.0372, "step": 1930 }, { "epoch": 32.2, "learning_rate": 2.3171296296296298e-05, "loss": 0.0433, "step": 1940 }, { "epoch": 32.37, "learning_rate": 2.3090277777777777e-05, "loss": 0.0536, "step": 1950 }, { "epoch": 32.53, "learning_rate": 2.300925925925926e-05, "loss": 0.0497, "step": 1960 }, { "epoch": 32.7, "learning_rate": 2.292824074074074e-05, "loss": 0.0317, "step": 1970 }, { "epoch": 32.86, "learning_rate": 2.284722222222222e-05, "loss": 0.0301, "step": 1980 }, { "epoch": 33.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.297067403793335, "eval_runtime": 0.6982, "eval_samples_per_second": 65.888, "eval_steps_per_second": 4.297, "step": 1988 }, { "epoch": 33.03, "learning_rate": 2.27662037037037e-05, "loss": 0.0488, "step": 1990 }, { "epoch": 33.2, "learning_rate": 2.2685185185185183e-05, "loss": 0.0382, "step": 2000 }, { "epoch": 33.36, "learning_rate": 2.2604166666666666e-05, "loss": 0.0576, "step": 2010 }, { "epoch": 33.53, "learning_rate": 2.2523148148148145e-05, "loss": 0.0111, "step": 2020 }, { "epoch": 33.69, "learning_rate": 2.2442129629629628e-05, "loss": 0.0685, "step": 2030 }, { "epoch": 33.86, "learning_rate": 2.2361111111111107e-05, "loss": 0.0443, "step": 2040 }, { "epoch": 33.99, "eval_accuracy": 0.717391304347826, "eval_loss": 1.3993148803710938, "eval_runtime": 0.6712, "eval_samples_per_second": 68.537, "eval_steps_per_second": 4.47, "step": 2048 }, { "epoch": 34.02, "learning_rate": 2.228009259259259e-05, "loss": 0.0488, "step": 2050 }, { "epoch": 34.19, "learning_rate": 2.2199074074074073e-05, "loss": 0.0322, "step": 2060 }, { "epoch": 34.36, "learning_rate": 2.2118055555555552e-05, "loss": 0.0353, "step": 2070 }, { "epoch": 34.52, "learning_rate": 2.2037037037037034e-05, "loss": 0.0387, "step": 2080 }, { "epoch": 34.69, "learning_rate": 2.1956018518518514e-05, "loss": 0.086, "step": 2090 }, { "epoch": 34.85, "learning_rate": 2.1874999999999996e-05, "loss": 0.0782, "step": 2100 }, { "epoch": 34.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.3358551263809204, "eval_runtime": 0.6692, "eval_samples_per_second": 68.742, "eval_steps_per_second": 4.483, "step": 2108 }, { "epoch": 35.02, "learning_rate": 2.1793981481481482e-05, "loss": 0.0232, "step": 2110 }, { "epoch": 35.19, "learning_rate": 2.171296296296296e-05, "loss": 0.0295, "step": 2120 }, { "epoch": 35.35, "learning_rate": 2.1631944444444444e-05, "loss": 0.0456, "step": 2130 }, { "epoch": 35.52, "learning_rate": 2.1550925925925924e-05, "loss": 0.0266, "step": 2140 }, { "epoch": 35.68, "learning_rate": 2.1469907407407406e-05, "loss": 0.0462, "step": 2150 }, { "epoch": 35.85, "learning_rate": 2.138888888888889e-05, "loss": 0.0287, "step": 2160 }, { "epoch": 36.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.3011459112167358, "eval_runtime": 0.7002, "eval_samples_per_second": 65.698, "eval_steps_per_second": 4.285, "step": 2169 }, { "epoch": 36.02, "learning_rate": 2.1307870370370368e-05, "loss": 0.0502, "step": 2170 }, { "epoch": 36.18, "learning_rate": 2.122685185185185e-05, "loss": 0.0499, "step": 2180 }, { "epoch": 36.35, "learning_rate": 2.114583333333333e-05, "loss": 0.0313, "step": 2190 }, { "epoch": 36.51, "learning_rate": 2.1064814814814813e-05, "loss": 0.0467, "step": 2200 }, { "epoch": 36.68, "learning_rate": 2.0983796296296295e-05, "loss": 0.0496, "step": 2210 }, { "epoch": 36.85, "learning_rate": 2.0902777777777775e-05, "loss": 0.0347, "step": 2220 }, { "epoch": 37.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.2450307607650757, "eval_runtime": 0.7057, "eval_samples_per_second": 65.186, "eval_steps_per_second": 4.251, "step": 2229 }, { "epoch": 37.01, "learning_rate": 2.0821759259259257e-05, "loss": 0.0322, "step": 2230 }, { "epoch": 37.18, "learning_rate": 2.0740740740740737e-05, "loss": 0.0213, "step": 2240 }, { "epoch": 37.34, "learning_rate": 2.065972222222222e-05, "loss": 0.0215, "step": 2250 }, { "epoch": 37.51, "learning_rate": 2.05787037037037e-05, "loss": 0.0172, "step": 2260 }, { "epoch": 37.68, "learning_rate": 2.0497685185185184e-05, "loss": 0.0264, "step": 2270 }, { "epoch": 37.84, "learning_rate": 2.0416666666666667e-05, "loss": 0.0538, "step": 2280 }, { "epoch": 37.99, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.8215843439102173, "eval_runtime": 0.6952, "eval_samples_per_second": 66.171, "eval_steps_per_second": 4.315, "step": 2289 }, { "epoch": 38.01, "learning_rate": 2.0335648148148146e-05, "loss": 0.0541, "step": 2290 }, { "epoch": 38.17, "learning_rate": 2.025462962962963e-05, "loss": 0.0261, "step": 2300 }, { "epoch": 38.34, "learning_rate": 2.017361111111111e-05, "loss": 0.0371, "step": 2310 }, { "epoch": 38.51, "learning_rate": 2.009259259259259e-05, "loss": 0.028, "step": 2320 }, { "epoch": 38.67, "learning_rate": 2.0011574074074074e-05, "loss": 0.0167, "step": 2330 }, { "epoch": 38.84, "learning_rate": 1.9930555555555553e-05, "loss": 0.027, "step": 2340 }, { "epoch": 38.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.170117735862732, "eval_runtime": 0.7309, "eval_samples_per_second": 62.933, "eval_steps_per_second": 4.104, "step": 2349 }, { "epoch": 39.0, "learning_rate": 1.9849537037037036e-05, "loss": 0.0375, "step": 2350 }, { "epoch": 39.17, "learning_rate": 1.9768518518518515e-05, "loss": 0.0467, "step": 2360 }, { "epoch": 39.34, "learning_rate": 1.9687499999999997e-05, "loss": 0.03, "step": 2370 }, { "epoch": 39.5, "learning_rate": 1.960648148148148e-05, "loss": 0.0243, "step": 2380 }, { "epoch": 39.67, "learning_rate": 1.952546296296296e-05, "loss": 0.0393, "step": 2390 }, { "epoch": 39.83, "learning_rate": 1.9444444444444442e-05, "loss": 0.0284, "step": 2400 }, { "epoch": 40.0, "learning_rate": 1.936342592592592e-05, "loss": 0.038, "step": 2410 }, { "epoch": 40.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.1025199890136719, "eval_runtime": 0.7243, "eval_samples_per_second": 63.514, "eval_steps_per_second": 4.142, "step": 2410 }, { "epoch": 40.17, "learning_rate": 1.9282407407407407e-05, "loss": 0.0243, "step": 2420 }, { "epoch": 40.33, "learning_rate": 1.920138888888889e-05, "loss": 0.049, "step": 2430 }, { "epoch": 40.5, "learning_rate": 1.912037037037037e-05, "loss": 0.0319, "step": 2440 }, { "epoch": 40.66, "learning_rate": 1.9039351851851852e-05, "loss": 0.0413, "step": 2450 }, { "epoch": 40.83, "learning_rate": 1.895833333333333e-05, "loss": 0.0369, "step": 2460 }, { "epoch": 41.0, "learning_rate": 1.8877314814814814e-05, "loss": 0.0244, "step": 2470 }, { "epoch": 41.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.2912131547927856, "eval_runtime": 0.6786, "eval_samples_per_second": 67.783, "eval_steps_per_second": 4.421, "step": 2470 }, { "epoch": 41.16, "learning_rate": 1.8796296296296296e-05, "loss": 0.0467, "step": 2480 }, { "epoch": 41.33, "learning_rate": 1.8715277777777776e-05, "loss": 0.0295, "step": 2490 }, { "epoch": 41.49, "learning_rate": 1.863425925925926e-05, "loss": 0.0241, "step": 2500 }, { "epoch": 41.66, "learning_rate": 1.8553240740740738e-05, "loss": 0.0238, "step": 2510 }, { "epoch": 41.83, "learning_rate": 1.847222222222222e-05, "loss": 0.0201, "step": 2520 }, { "epoch": 41.99, "learning_rate": 1.83912037037037e-05, "loss": 0.0122, "step": 2530 }, { "epoch": 41.99, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.5698707103729248, "eval_runtime": 0.7096, "eval_samples_per_second": 64.826, "eval_steps_per_second": 4.228, "step": 2530 }, { "epoch": 42.16, "learning_rate": 1.8310185185185182e-05, "loss": 0.0321, "step": 2540 }, { "epoch": 42.32, "learning_rate": 1.8229166666666665e-05, "loss": 0.0336, "step": 2550 }, { "epoch": 42.49, "learning_rate": 1.8148148148148144e-05, "loss": 0.0143, "step": 2560 }, { "epoch": 42.66, "learning_rate": 1.806712962962963e-05, "loss": 0.0205, "step": 2570 }, { "epoch": 42.82, "learning_rate": 1.798611111111111e-05, "loss": 0.0347, "step": 2580 }, { "epoch": 42.99, "learning_rate": 1.7905092592592592e-05, "loss": 0.023, "step": 2590 }, { "epoch": 42.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.51142156124115, "eval_runtime": 0.7048, "eval_samples_per_second": 65.263, "eval_steps_per_second": 4.256, "step": 2590 }, { "epoch": 43.15, "learning_rate": 1.7824074074074075e-05, "loss": 0.025, "step": 2600 }, { "epoch": 43.32, "learning_rate": 1.7743055555555554e-05, "loss": 0.0213, "step": 2610 }, { "epoch": 43.49, "learning_rate": 1.7662037037037037e-05, "loss": 0.0491, "step": 2620 }, { "epoch": 43.65, "learning_rate": 1.7581018518518516e-05, "loss": 0.0148, "step": 2630 }, { "epoch": 43.82, "learning_rate": 1.75e-05, "loss": 0.0267, "step": 2640 }, { "epoch": 43.98, "learning_rate": 1.741898148148148e-05, "loss": 0.0297, "step": 2650 }, { "epoch": 44.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 1.2188609838485718, "eval_runtime": 0.7237, "eval_samples_per_second": 63.561, "eval_steps_per_second": 4.145, "step": 2651 }, { "epoch": 44.15, "learning_rate": 1.733796296296296e-05, "loss": 0.0183, "step": 2660 }, { "epoch": 44.32, "learning_rate": 1.7256944444444443e-05, "loss": 0.0352, "step": 2670 }, { "epoch": 44.48, "learning_rate": 1.7175925925925926e-05, "loss": 0.0243, "step": 2680 }, { "epoch": 44.65, "learning_rate": 1.7094907407407405e-05, "loss": 0.104, "step": 2690 }, { "epoch": 44.81, "learning_rate": 1.7013888888888888e-05, "loss": 0.015, "step": 2700 }, { "epoch": 44.98, "learning_rate": 1.6932870370370367e-05, "loss": 0.0284, "step": 2710 }, { "epoch": 45.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.3997039794921875, "eval_runtime": 0.7121, "eval_samples_per_second": 64.595, "eval_steps_per_second": 4.213, "step": 2711 }, { "epoch": 45.15, "learning_rate": 1.685185185185185e-05, "loss": 0.0133, "step": 2720 }, { "epoch": 45.31, "learning_rate": 1.6770833333333332e-05, "loss": 0.0159, "step": 2730 }, { "epoch": 45.48, "learning_rate": 1.6689814814814815e-05, "loss": 0.0183, "step": 2740 }, { "epoch": 45.64, "learning_rate": 1.6608796296296294e-05, "loss": 0.0302, "step": 2750 }, { "epoch": 45.81, "learning_rate": 1.6527777777777777e-05, "loss": 0.0232, "step": 2760 }, { "epoch": 45.98, "learning_rate": 1.6446759259259256e-05, "loss": 0.0203, "step": 2770 }, { "epoch": 45.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.4792205095291138, "eval_runtime": 0.6983, "eval_samples_per_second": 65.879, "eval_steps_per_second": 4.296, "step": 2771 }, { "epoch": 46.14, "learning_rate": 1.636574074074074e-05, "loss": 0.037, "step": 2780 }, { "epoch": 46.31, "learning_rate": 1.628472222222222e-05, "loss": 0.0337, "step": 2790 }, { "epoch": 46.47, "learning_rate": 1.6203703703703704e-05, "loss": 0.012, "step": 2800 }, { "epoch": 46.64, "learning_rate": 1.6122685185185183e-05, "loss": 0.0568, "step": 2810 }, { "epoch": 46.8, "learning_rate": 1.6041666666666666e-05, "loss": 0.018, "step": 2820 }, { "epoch": 46.97, "learning_rate": 1.596064814814815e-05, "loss": 0.03, "step": 2830 }, { "epoch": 46.99, "eval_accuracy": 0.717391304347826, "eval_loss": 1.7487258911132812, "eval_runtime": 0.7112, "eval_samples_per_second": 64.68, "eval_steps_per_second": 4.218, "step": 2831 }, { "epoch": 47.14, "learning_rate": 1.5879629629629628e-05, "loss": 0.0083, "step": 2840 }, { "epoch": 47.3, "learning_rate": 1.579861111111111e-05, "loss": 0.0431, "step": 2850 }, { "epoch": 47.47, "learning_rate": 1.571759259259259e-05, "loss": 0.0104, "step": 2860 }, { "epoch": 47.63, "learning_rate": 1.5636574074074072e-05, "loss": 0.0322, "step": 2870 }, { "epoch": 47.8, "learning_rate": 1.5555555555555555e-05, "loss": 0.0185, "step": 2880 }, { "epoch": 47.97, "learning_rate": 1.5474537037037038e-05, "loss": 0.025, "step": 2890 }, { "epoch": 48.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.6605217456817627, "eval_runtime": 0.6827, "eval_samples_per_second": 67.383, "eval_steps_per_second": 4.395, "step": 2892 }, { "epoch": 48.13, "learning_rate": 1.5393518518518517e-05, "loss": 0.0269, "step": 2900 }, { "epoch": 48.3, "learning_rate": 1.53125e-05, "loss": 0.0274, "step": 2910 }, { "epoch": 48.46, "learning_rate": 1.523148148148148e-05, "loss": 0.033, "step": 2920 }, { "epoch": 48.63, "learning_rate": 1.5150462962962961e-05, "loss": 0.0241, "step": 2930 }, { "epoch": 48.8, "learning_rate": 1.5069444444444444e-05, "loss": 0.0445, "step": 2940 }, { "epoch": 48.96, "learning_rate": 1.4988425925925925e-05, "loss": 0.0134, "step": 2950 }, { "epoch": 49.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.410630226135254, "eval_runtime": 0.7027, "eval_samples_per_second": 65.459, "eval_steps_per_second": 4.269, "step": 2952 }, { "epoch": 49.13, "learning_rate": 1.4907407407407406e-05, "loss": 0.0366, "step": 2960 }, { "epoch": 49.29, "learning_rate": 1.4826388888888887e-05, "loss": 0.0219, "step": 2970 }, { "epoch": 49.46, "learning_rate": 1.4745370370370368e-05, "loss": 0.0092, "step": 2980 }, { "epoch": 49.63, "learning_rate": 1.4664351851851849e-05, "loss": 0.0295, "step": 2990 }, { "epoch": 49.79, "learning_rate": 1.4583333333333333e-05, "loss": 0.0183, "step": 3000 }, { "epoch": 49.96, "learning_rate": 1.4502314814814814e-05, "loss": 0.026, "step": 3010 }, { "epoch": 49.99, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.2972004413604736, "eval_runtime": 0.6842, "eval_samples_per_second": 67.236, "eval_steps_per_second": 4.385, "step": 3012 }, { "epoch": 50.12, "learning_rate": 1.4421296296296295e-05, "loss": 0.0207, "step": 3020 }, { "epoch": 50.29, "learning_rate": 1.4340277777777776e-05, "loss": 0.0111, "step": 3030 }, { "epoch": 50.46, "learning_rate": 1.4259259259259257e-05, "loss": 0.0364, "step": 3040 }, { "epoch": 50.62, "learning_rate": 1.417824074074074e-05, "loss": 0.0148, "step": 3050 }, { "epoch": 50.79, "learning_rate": 1.409722222222222e-05, "loss": 0.0271, "step": 3060 }, { "epoch": 50.95, "learning_rate": 1.4016203703703703e-05, "loss": 0.0507, "step": 3070 }, { "epoch": 50.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.3302582502365112, "eval_runtime": 0.694, "eval_samples_per_second": 66.283, "eval_steps_per_second": 4.323, "step": 3072 }, { "epoch": 51.12, "learning_rate": 1.3935185185185184e-05, "loss": 0.0164, "step": 3080 }, { "epoch": 51.29, "learning_rate": 1.3854166666666665e-05, "loss": 0.0151, "step": 3090 }, { "epoch": 51.45, "learning_rate": 1.3773148148148148e-05, "loss": 0.0495, "step": 3100 }, { "epoch": 51.62, "learning_rate": 1.3692129629629629e-05, "loss": 0.0056, "step": 3110 }, { "epoch": 51.78, "learning_rate": 1.361111111111111e-05, "loss": 0.0323, "step": 3120 }, { "epoch": 51.95, "learning_rate": 1.353009259259259e-05, "loss": 0.0394, "step": 3130 }, { "epoch": 52.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 1.1954048871994019, "eval_runtime": 0.7081, "eval_samples_per_second": 64.959, "eval_steps_per_second": 4.236, "step": 3133 }, { "epoch": 52.12, "learning_rate": 1.3449074074074072e-05, "loss": 0.0266, "step": 3140 }, { "epoch": 52.28, "learning_rate": 1.3368055555555554e-05, "loss": 0.0354, "step": 3150 }, { "epoch": 52.45, "learning_rate": 1.3287037037037037e-05, "loss": 0.0168, "step": 3160 }, { "epoch": 52.61, "learning_rate": 1.3206018518518518e-05, "loss": 0.0259, "step": 3170 }, { "epoch": 52.78, "learning_rate": 1.3124999999999999e-05, "loss": 0.027, "step": 3180 }, { "epoch": 52.95, "learning_rate": 1.304398148148148e-05, "loss": 0.0271, "step": 3190 }, { "epoch": 53.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.312525987625122, "eval_runtime": 0.6832, "eval_samples_per_second": 67.329, "eval_steps_per_second": 4.391, "step": 3193 }, { "epoch": 53.11, "learning_rate": 1.296296296296296e-05, "loss": 0.0101, "step": 3200 }, { "epoch": 53.28, "learning_rate": 1.2881944444444443e-05, "loss": 0.0212, "step": 3210 }, { "epoch": 53.44, "learning_rate": 1.2800925925925926e-05, "loss": 0.0057, "step": 3220 }, { "epoch": 53.61, "learning_rate": 1.2719907407407407e-05, "loss": 0.0132, "step": 3230 }, { "epoch": 53.78, "learning_rate": 1.2638888888888888e-05, "loss": 0.0173, "step": 3240 }, { "epoch": 53.94, "learning_rate": 1.2557870370370369e-05, "loss": 0.0115, "step": 3250 }, { "epoch": 53.99, "eval_accuracy": 0.8478260869565217, "eval_loss": 1.3443913459777832, "eval_runtime": 0.7167, "eval_samples_per_second": 64.185, "eval_steps_per_second": 4.186, "step": 3253 }, { "epoch": 54.11, "learning_rate": 1.247685185185185e-05, "loss": 0.0028, "step": 3260 }, { "epoch": 54.27, "learning_rate": 1.2395833333333333e-05, "loss": 0.0184, "step": 3270 }, { "epoch": 54.44, "learning_rate": 1.2314814814814813e-05, "loss": 0.0007, "step": 3280 }, { "epoch": 54.61, "learning_rate": 1.2233796296296294e-05, "loss": 0.0233, "step": 3290 }, { "epoch": 54.77, "learning_rate": 1.2152777777777777e-05, "loss": 0.0078, "step": 3300 }, { "epoch": 54.94, "learning_rate": 1.2071759259259258e-05, "loss": 0.0138, "step": 3310 }, { "epoch": 54.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.4688717126846313, "eval_runtime": 0.6497, "eval_samples_per_second": 70.806, "eval_steps_per_second": 4.618, "step": 3313 }, { "epoch": 55.1, "learning_rate": 1.199074074074074e-05, "loss": 0.0081, "step": 3320 }, { "epoch": 55.27, "learning_rate": 1.1909722222222222e-05, "loss": 0.0218, "step": 3330 }, { "epoch": 55.44, "learning_rate": 1.1828703703703703e-05, "loss": 0.0143, "step": 3340 }, { "epoch": 55.6, "learning_rate": 1.1747685185185184e-05, "loss": 0.0216, "step": 3350 }, { "epoch": 55.77, "learning_rate": 1.1666666666666665e-05, "loss": 0.0091, "step": 3360 }, { "epoch": 55.93, "learning_rate": 1.1585648148148149e-05, "loss": 0.0184, "step": 3370 }, { "epoch": 56.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.4959295988082886, "eval_runtime": 0.6842, "eval_samples_per_second": 67.228, "eval_steps_per_second": 4.384, "step": 3374 }, { "epoch": 56.1, "learning_rate": 1.150462962962963e-05, "loss": 0.0134, "step": 3380 }, { "epoch": 56.27, "learning_rate": 1.142361111111111e-05, "loss": 0.0193, "step": 3390 }, { "epoch": 56.43, "learning_rate": 1.1342592592592592e-05, "loss": 0.0049, "step": 3400 }, { "epoch": 56.6, "learning_rate": 1.1261574074074073e-05, "loss": 0.0105, "step": 3410 }, { "epoch": 56.76, "learning_rate": 1.1180555555555554e-05, "loss": 0.0245, "step": 3420 }, { "epoch": 56.93, "learning_rate": 1.1099537037037036e-05, "loss": 0.0163, "step": 3430 }, { "epoch": 57.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.349004864692688, "eval_runtime": 0.6951, "eval_samples_per_second": 66.174, "eval_steps_per_second": 4.316, "step": 3434 }, { "epoch": 57.1, "learning_rate": 1.1018518518518517e-05, "loss": 0.0116, "step": 3440 }, { "epoch": 57.26, "learning_rate": 1.0937499999999998e-05, "loss": 0.0352, "step": 3450 }, { "epoch": 57.43, "learning_rate": 1.085648148148148e-05, "loss": 0.0316, "step": 3460 }, { "epoch": 57.59, "learning_rate": 1.0775462962962962e-05, "loss": 0.0291, "step": 3470 }, { "epoch": 57.76, "learning_rate": 1.0694444444444444e-05, "loss": 0.0388, "step": 3480 }, { "epoch": 57.93, "learning_rate": 1.0613425925925925e-05, "loss": 0.0112, "step": 3490 }, { "epoch": 57.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.4749095439910889, "eval_runtime": 0.6781, "eval_samples_per_second": 67.833, "eval_steps_per_second": 4.424, "step": 3494 }, { "epoch": 58.09, "learning_rate": 1.0532407407407406e-05, "loss": 0.025, "step": 3500 }, { "epoch": 58.26, "learning_rate": 1.0451388888888887e-05, "loss": 0.0354, "step": 3510 }, { "epoch": 58.42, "learning_rate": 1.0370370370370368e-05, "loss": 0.0064, "step": 3520 }, { "epoch": 58.59, "learning_rate": 1.028935185185185e-05, "loss": 0.0058, "step": 3530 }, { "epoch": 58.76, "learning_rate": 1.0208333333333334e-05, "loss": 0.0241, "step": 3540 }, { "epoch": 58.92, "learning_rate": 1.0127314814814815e-05, "loss": 0.0185, "step": 3550 }, { "epoch": 58.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.5823291540145874, "eval_runtime": 0.6695, "eval_samples_per_second": 68.712, "eval_steps_per_second": 4.481, "step": 3554 }, { "epoch": 59.09, "learning_rate": 1.0046296296296295e-05, "loss": 0.0089, "step": 3560 }, { "epoch": 59.25, "learning_rate": 9.965277777777776e-06, "loss": 0.0291, "step": 3570 }, { "epoch": 59.42, "learning_rate": 9.884259259259257e-06, "loss": 0.0118, "step": 3580 }, { "epoch": 59.59, "learning_rate": 9.80324074074074e-06, "loss": 0.0318, "step": 3590 }, { "epoch": 59.75, "learning_rate": 9.722222222222221e-06, "loss": 0.0395, "step": 3600 }, { "epoch": 59.92, "learning_rate": 9.641203703703704e-06, "loss": 0.031, "step": 3610 }, { "epoch": 60.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.5190129280090332, "eval_runtime": 0.7102, "eval_samples_per_second": 64.766, "eval_steps_per_second": 4.224, "step": 3615 }, { "epoch": 60.08, "learning_rate": 9.560185185185185e-06, "loss": 0.0127, "step": 3620 }, { "epoch": 60.25, "learning_rate": 9.479166666666666e-06, "loss": 0.0065, "step": 3630 }, { "epoch": 60.41, "learning_rate": 9.398148148148148e-06, "loss": 0.0129, "step": 3640 }, { "epoch": 60.58, "learning_rate": 9.31712962962963e-06, "loss": 0.0157, "step": 3650 }, { "epoch": 60.75, "learning_rate": 9.23611111111111e-06, "loss": 0.0134, "step": 3660 }, { "epoch": 60.91, "learning_rate": 9.155092592592591e-06, "loss": 0.0161, "step": 3670 }, { "epoch": 61.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.5475845336914062, "eval_runtime": 0.7113, "eval_samples_per_second": 64.674, "eval_steps_per_second": 4.218, "step": 3675 }, { "epoch": 61.08, "learning_rate": 9.074074074074072e-06, "loss": 0.0117, "step": 3680 }, { "epoch": 61.24, "learning_rate": 8.993055555555555e-06, "loss": 0.0135, "step": 3690 }, { "epoch": 61.41, "learning_rate": 8.912037037037037e-06, "loss": 0.0179, "step": 3700 }, { "epoch": 61.58, "learning_rate": 8.831018518518518e-06, "loss": 0.0253, "step": 3710 }, { "epoch": 61.74, "learning_rate": 8.75e-06, "loss": 0.0267, "step": 3720 }, { "epoch": 61.91, "learning_rate": 8.66898148148148e-06, "loss": 0.0146, "step": 3730 }, { "epoch": 61.99, "eval_accuracy": 0.782608695652174, "eval_loss": 1.3930317163467407, "eval_runtime": 0.6749, "eval_samples_per_second": 68.162, "eval_steps_per_second": 4.445, "step": 3735 }, { "epoch": 62.07, "learning_rate": 8.587962962962963e-06, "loss": 0.0202, "step": 3740 }, { "epoch": 62.24, "learning_rate": 8.506944444444444e-06, "loss": 0.0162, "step": 3750 }, { "epoch": 62.41, "learning_rate": 8.425925925925925e-06, "loss": 0.0199, "step": 3760 }, { "epoch": 62.57, "learning_rate": 8.344907407407407e-06, "loss": 0.0135, "step": 3770 }, { "epoch": 62.74, "learning_rate": 8.263888888888888e-06, "loss": 0.0132, "step": 3780 }, { "epoch": 62.9, "learning_rate": 8.18287037037037e-06, "loss": 0.005, "step": 3790 }, { "epoch": 62.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.5454456806182861, "eval_runtime": 0.7582, "eval_samples_per_second": 60.67, "eval_steps_per_second": 3.957, "step": 3795 }, { "epoch": 63.07, "learning_rate": 8.101851851851852e-06, "loss": 0.03, "step": 3800 }, { "epoch": 63.24, "learning_rate": 8.020833333333333e-06, "loss": 0.0326, "step": 3810 }, { "epoch": 63.4, "learning_rate": 7.939814814814814e-06, "loss": 0.0128, "step": 3820 }, { "epoch": 63.57, "learning_rate": 7.858796296296295e-06, "loss": 0.0108, "step": 3830 }, { "epoch": 63.73, "learning_rate": 7.777777777777777e-06, "loss": 0.0011, "step": 3840 }, { "epoch": 63.9, "learning_rate": 7.696759259259258e-06, "loss": 0.0093, "step": 3850 }, { "epoch": 64.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.5958888530731201, "eval_runtime": 0.6768, "eval_samples_per_second": 67.965, "eval_steps_per_second": 4.433, "step": 3856 }, { "epoch": 64.07, "learning_rate": 7.61574074074074e-06, "loss": 0.0132, "step": 3860 }, { "epoch": 64.23, "learning_rate": 7.534722222222222e-06, "loss": 0.0085, "step": 3870 }, { "epoch": 64.4, "learning_rate": 7.453703703703703e-06, "loss": 0.0326, "step": 3880 }, { "epoch": 64.56, "learning_rate": 7.372685185185184e-06, "loss": 0.0295, "step": 3890 }, { "epoch": 64.73, "learning_rate": 7.291666666666667e-06, "loss": 0.0172, "step": 3900 }, { "epoch": 64.9, "learning_rate": 7.2106481481481475e-06, "loss": 0.0224, "step": 3910 }, { "epoch": 65.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.4553816318511963, "eval_runtime": 0.7364, "eval_samples_per_second": 62.467, "eval_steps_per_second": 4.074, "step": 3916 }, { "epoch": 65.06, "learning_rate": 7.1296296296296285e-06, "loss": 0.0208, "step": 3920 }, { "epoch": 65.23, "learning_rate": 7.04861111111111e-06, "loss": 0.023, "step": 3930 }, { "epoch": 65.39, "learning_rate": 6.967592592592592e-06, "loss": 0.0205, "step": 3940 }, { "epoch": 65.56, "learning_rate": 6.886574074074074e-06, "loss": 0.0088, "step": 3950 }, { "epoch": 65.73, "learning_rate": 6.805555555555555e-06, "loss": 0.0223, "step": 3960 }, { "epoch": 65.89, "learning_rate": 6.724537037037036e-06, "loss": 0.0154, "step": 3970 }, { "epoch": 65.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.5326697826385498, "eval_runtime": 0.7162, "eval_samples_per_second": 64.228, "eval_steps_per_second": 4.189, "step": 3976 }, { "epoch": 66.06, "learning_rate": 6.6435185185185185e-06, "loss": 0.0093, "step": 3980 }, { "epoch": 66.22, "learning_rate": 6.5624999999999994e-06, "loss": 0.0138, "step": 3990 }, { "epoch": 66.39, "learning_rate": 6.48148148148148e-06, "loss": 0.0059, "step": 4000 }, { "epoch": 66.56, "learning_rate": 6.400462962962963e-06, "loss": 0.0033, "step": 4010 }, { "epoch": 66.72, "learning_rate": 6.319444444444444e-06, "loss": 0.0237, "step": 4020 }, { "epoch": 66.89, "learning_rate": 6.238425925925925e-06, "loss": 0.0116, "step": 4030 }, { "epoch": 66.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6029945611953735, "eval_runtime": 0.7141, "eval_samples_per_second": 64.418, "eval_steps_per_second": 4.201, "step": 4036 }, { "epoch": 67.05, "learning_rate": 6.157407407407407e-06, "loss": 0.0064, "step": 4040 }, { "epoch": 67.22, "learning_rate": 6.0763888888888885e-06, "loss": 0.0129, "step": 4050 }, { "epoch": 67.39, "learning_rate": 5.99537037037037e-06, "loss": 0.0032, "step": 4060 }, { "epoch": 67.55, "learning_rate": 5.914351851851851e-06, "loss": 0.0101, "step": 4070 }, { "epoch": 67.72, "learning_rate": 5.833333333333332e-06, "loss": 0.0051, "step": 4080 }, { "epoch": 67.88, "learning_rate": 5.752314814814815e-06, "loss": 0.0037, "step": 4090 }, { "epoch": 68.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.5046288967132568, "eval_runtime": 0.7027, "eval_samples_per_second": 65.464, "eval_steps_per_second": 4.269, "step": 4097 }, { "epoch": 68.05, "learning_rate": 5.671296296296296e-06, "loss": 0.0151, "step": 4100 }, { "epoch": 68.22, "learning_rate": 5.590277777777777e-06, "loss": 0.0007, "step": 4110 }, { "epoch": 68.38, "learning_rate": 5.509259259259259e-06, "loss": 0.0212, "step": 4120 }, { "epoch": 68.55, "learning_rate": 5.42824074074074e-06, "loss": 0.0038, "step": 4130 }, { "epoch": 68.71, "learning_rate": 5.347222222222222e-06, "loss": 0.0211, "step": 4140 }, { "epoch": 68.88, "learning_rate": 5.266203703703703e-06, "loss": 0.0023, "step": 4150 }, { "epoch": 69.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.5222116708755493, "eval_runtime": 0.6773, "eval_samples_per_second": 67.922, "eval_steps_per_second": 4.43, "step": 4157 }, { "epoch": 69.05, "learning_rate": 5.185185185185184e-06, "loss": 0.012, "step": 4160 }, { "epoch": 69.21, "learning_rate": 5.104166666666667e-06, "loss": 0.0086, "step": 4170 }, { "epoch": 69.38, "learning_rate": 5.023148148148148e-06, "loss": 0.0203, "step": 4180 }, { "epoch": 69.54, "learning_rate": 4.942129629629629e-06, "loss": 0.0074, "step": 4190 }, { "epoch": 69.71, "learning_rate": 4.8611111111111105e-06, "loss": 0.0071, "step": 4200 }, { "epoch": 69.88, "learning_rate": 4.780092592592592e-06, "loss": 0.0068, "step": 4210 }, { "epoch": 69.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 1.4339091777801514, "eval_runtime": 0.686, "eval_samples_per_second": 67.057, "eval_steps_per_second": 4.373, "step": 4217 }, { "epoch": 70.04, "learning_rate": 4.699074074074074e-06, "loss": 0.0015, "step": 4220 }, { "epoch": 70.21, "learning_rate": 4.618055555555555e-06, "loss": 0.0096, "step": 4230 }, { "epoch": 70.37, "learning_rate": 4.537037037037036e-06, "loss": 0.0074, "step": 4240 }, { "epoch": 70.54, "learning_rate": 4.456018518518519e-06, "loss": 0.0172, "step": 4250 }, { "epoch": 70.71, "learning_rate": 4.375e-06, "loss": 0.0024, "step": 4260 }, { "epoch": 70.87, "learning_rate": 4.293981481481481e-06, "loss": 0.0342, "step": 4270 }, { "epoch": 70.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6964349746704102, "eval_runtime": 0.7042, "eval_samples_per_second": 65.326, "eval_steps_per_second": 4.26, "step": 4277 }, { "epoch": 71.04, "learning_rate": 4.212962962962962e-06, "loss": 0.0028, "step": 4280 }, { "epoch": 71.2, "learning_rate": 4.131944444444444e-06, "loss": 0.0185, "step": 4290 }, { "epoch": 71.37, "learning_rate": 4.050925925925926e-06, "loss": 0.0172, "step": 4300 }, { "epoch": 71.54, "learning_rate": 3.969907407407407e-06, "loss": 0.0051, "step": 4310 }, { "epoch": 71.7, "learning_rate": 3.888888888888889e-06, "loss": 0.0049, "step": 4320 }, { "epoch": 71.87, "learning_rate": 3.80787037037037e-06, "loss": 0.0077, "step": 4330 }, { "epoch": 72.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6102274656295776, "eval_runtime": 0.691, "eval_samples_per_second": 66.569, "eval_steps_per_second": 4.341, "step": 4338 }, { "epoch": 72.03, "learning_rate": 3.7268518518518515e-06, "loss": 0.0207, "step": 4340 }, { "epoch": 72.2, "learning_rate": 3.6458333333333333e-06, "loss": 0.0135, "step": 4350 }, { "epoch": 72.37, "learning_rate": 3.5648148148148143e-06, "loss": 0.0098, "step": 4360 }, { "epoch": 72.53, "learning_rate": 3.483796296296296e-06, "loss": 0.0207, "step": 4370 }, { "epoch": 72.7, "learning_rate": 3.4027777777777774e-06, "loss": 0.0007, "step": 4380 }, { "epoch": 72.86, "learning_rate": 3.3217592592592592e-06, "loss": 0.0043, "step": 4390 }, { "epoch": 73.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.668748140335083, "eval_runtime": 0.7135, "eval_samples_per_second": 64.472, "eval_steps_per_second": 4.205, "step": 4398 }, { "epoch": 73.03, "learning_rate": 3.24074074074074e-06, "loss": 0.0045, "step": 4400 }, { "epoch": 73.2, "learning_rate": 3.159722222222222e-06, "loss": 0.0121, "step": 4410 }, { "epoch": 73.36, "learning_rate": 3.0787037037037034e-06, "loss": 0.0131, "step": 4420 }, { "epoch": 73.53, "learning_rate": 2.997685185185185e-06, "loss": 0.0134, "step": 4430 }, { "epoch": 73.69, "learning_rate": 2.916666666666666e-06, "loss": 0.0233, "step": 4440 }, { "epoch": 73.86, "learning_rate": 2.835648148148148e-06, "loss": 0.0131, "step": 4450 }, { "epoch": 73.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6847338676452637, "eval_runtime": 0.702, "eval_samples_per_second": 65.531, "eval_steps_per_second": 4.274, "step": 4458 }, { "epoch": 74.02, "learning_rate": 2.7546296296296293e-06, "loss": 0.0151, "step": 4460 }, { "epoch": 74.19, "learning_rate": 2.673611111111111e-06, "loss": 0.0031, "step": 4470 }, { "epoch": 74.36, "learning_rate": 2.592592592592592e-06, "loss": 0.0135, "step": 4480 }, { "epoch": 74.52, "learning_rate": 2.511574074074074e-06, "loss": 0.0121, "step": 4490 }, { "epoch": 74.69, "learning_rate": 2.4305555555555552e-06, "loss": 0.0173, "step": 4500 }, { "epoch": 74.85, "learning_rate": 2.349537037037037e-06, "loss": 0.0031, "step": 4510 }, { "epoch": 74.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.7195295095443726, "eval_runtime": 0.7118, "eval_samples_per_second": 64.626, "eval_steps_per_second": 4.215, "step": 4518 }, { "epoch": 75.02, "learning_rate": 2.268518518518518e-06, "loss": 0.0069, "step": 4520 }, { "epoch": 75.19, "learning_rate": 2.1875e-06, "loss": 0.0063, "step": 4530 }, { "epoch": 75.35, "learning_rate": 2.106481481481481e-06, "loss": 0.0288, "step": 4540 }, { "epoch": 75.52, "learning_rate": 2.025462962962963e-06, "loss": 0.0162, "step": 4550 }, { "epoch": 75.68, "learning_rate": 1.9444444444444444e-06, "loss": 0.0041, "step": 4560 }, { "epoch": 75.85, "learning_rate": 1.8634259259259257e-06, "loss": 0.0087, "step": 4570 }, { "epoch": 76.0, "eval_accuracy": 0.782608695652174, "eval_loss": 1.720942735671997, "eval_runtime": 0.666, "eval_samples_per_second": 69.067, "eval_steps_per_second": 4.504, "step": 4579 }, { "epoch": 76.02, "learning_rate": 1.7824074074074071e-06, "loss": 0.0145, "step": 4580 }, { "epoch": 76.18, "learning_rate": 1.7013888888888887e-06, "loss": 0.0056, "step": 4590 }, { "epoch": 76.35, "learning_rate": 1.62037037037037e-06, "loss": 0.0108, "step": 4600 }, { "epoch": 76.51, "learning_rate": 1.5393518518518517e-06, "loss": 0.0152, "step": 4610 }, { "epoch": 76.68, "learning_rate": 1.458333333333333e-06, "loss": 0.0126, "step": 4620 }, { "epoch": 76.85, "learning_rate": 1.3773148148148147e-06, "loss": 0.0219, "step": 4630 }, { "epoch": 77.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6714533567428589, "eval_runtime": 0.7121, "eval_samples_per_second": 64.594, "eval_steps_per_second": 4.213, "step": 4639 }, { "epoch": 77.01, "learning_rate": 1.296296296296296e-06, "loss": 0.0078, "step": 4640 }, { "epoch": 77.18, "learning_rate": 1.2152777777777776e-06, "loss": 0.0186, "step": 4650 }, { "epoch": 77.34, "learning_rate": 1.134259259259259e-06, "loss": 0.0096, "step": 4660 }, { "epoch": 77.51, "learning_rate": 1.0532407407407406e-06, "loss": 0.0015, "step": 4670 }, { "epoch": 77.68, "learning_rate": 9.722222222222222e-07, "loss": 0.005, "step": 4680 }, { "epoch": 77.84, "learning_rate": 8.912037037037036e-07, "loss": 0.0229, "step": 4690 }, { "epoch": 77.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6823025941848755, "eval_runtime": 0.6989, "eval_samples_per_second": 65.82, "eval_steps_per_second": 4.293, "step": 4699 }, { "epoch": 78.01, "learning_rate": 8.10185185185185e-07, "loss": 0.0117, "step": 4700 }, { "epoch": 78.17, "learning_rate": 7.291666666666665e-07, "loss": 0.0014, "step": 4710 }, { "epoch": 78.34, "learning_rate": 6.48148148148148e-07, "loss": 0.0039, "step": 4720 }, { "epoch": 78.51, "learning_rate": 5.671296296296295e-07, "loss": 0.0111, "step": 4730 }, { "epoch": 78.67, "learning_rate": 4.861111111111111e-07, "loss": 0.0129, "step": 4740 }, { "epoch": 78.84, "learning_rate": 4.050925925925925e-07, "loss": 0.008, "step": 4750 }, { "epoch": 78.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.6750718355178833, "eval_runtime": 0.6921, "eval_samples_per_second": 66.467, "eval_steps_per_second": 4.335, "step": 4759 }, { "epoch": 79.0, "learning_rate": 3.24074074074074e-07, "loss": 0.0032, "step": 4760 }, { "epoch": 79.17, "learning_rate": 2.4305555555555555e-07, "loss": 0.0079, "step": 4770 }, { "epoch": 79.34, "learning_rate": 1.62037037037037e-07, "loss": 0.008, "step": 4780 }, { "epoch": 79.5, "learning_rate": 8.10185185185185e-08, "loss": 0.0057, "step": 4790 }, { "epoch": 79.67, "learning_rate": 0.0, "loss": 0.0051, "step": 4800 }, { "epoch": 79.67, "eval_accuracy": 0.8043478260869565, "eval_loss": 1.675818681716919, "eval_runtime": 0.6446, "eval_samples_per_second": 71.357, "eval_steps_per_second": 4.654, "step": 4800 }, { "epoch": 79.67, "step": 4800, "total_flos": 2.3777433087459287e+19, "train_loss": 0.12199072747869649, "train_runtime": 7091.3369, "train_samples_per_second": 43.467, "train_steps_per_second": 0.677 } ], "logging_steps": 10, "max_steps": 4800, "num_input_tokens_seen": 0, "num_train_epochs": 80, "save_steps": 500, "total_flos": 2.3777433087459287e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }