|
{ |
|
"best_metric": 0.7371370351502802, |
|
"best_model_checkpoint": "/home/user/emrecan/models/dbmdz_distilbert-base-turkish-cased_allnli_tr/checkpoint-80000", |
|
"epoch": 3.0, |
|
"global_step": 88320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9773550724637682e-05, |
|
"loss": 0.94, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.5812531839021905, |
|
"eval_loss": 0.9074209928512573, |
|
"eval_runtime": 19.0819, |
|
"eval_samples_per_second": 514.361, |
|
"eval_steps_per_second": 16.089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9547101449275363e-05, |
|
"loss": 0.8102, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.5949057564951604, |
|
"eval_loss": 0.8801841139793396, |
|
"eval_runtime": 18.8995, |
|
"eval_samples_per_second": 519.327, |
|
"eval_steps_per_second": 16.244, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9320652173913047e-05, |
|
"loss": 0.7737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6154865002547122, |
|
"eval_loss": 0.8491194248199463, |
|
"eval_runtime": 18.862, |
|
"eval_samples_per_second": 520.36, |
|
"eval_steps_per_second": 16.276, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9094202898550727e-05, |
|
"loss": 0.7576, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6260825267447784, |
|
"eval_loss": 0.8283059000968933, |
|
"eval_runtime": 19.0535, |
|
"eval_samples_per_second": 515.129, |
|
"eval_steps_per_second": 16.113, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8867753623188408e-05, |
|
"loss": 0.7286, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6361691288843607, |
|
"eval_loss": 0.8149797916412354, |
|
"eval_runtime": 18.9255, |
|
"eval_samples_per_second": 518.613, |
|
"eval_steps_per_second": 16.222, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.864130434782609e-05, |
|
"loss": 0.7162, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6400407539480387, |
|
"eval_loss": 0.7998170852661133, |
|
"eval_runtime": 18.8499, |
|
"eval_samples_per_second": 520.693, |
|
"eval_steps_per_second": 16.287, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.841485507246377e-05, |
|
"loss": 0.7092, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.6565461029037188, |
|
"eval_loss": 0.7830348014831543, |
|
"eval_runtime": 19.051, |
|
"eval_samples_per_second": 515.195, |
|
"eval_steps_per_second": 16.115, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.818840579710145e-05, |
|
"loss": 0.6962, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.6628629648497198, |
|
"eval_loss": 0.7653073072433472, |
|
"eval_runtime": 19.1088, |
|
"eval_samples_per_second": 513.638, |
|
"eval_steps_per_second": 16.066, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7961956521739134e-05, |
|
"loss": 0.6876, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.6686704024452369, |
|
"eval_loss": 0.7630106210708618, |
|
"eval_runtime": 19.1562, |
|
"eval_samples_per_second": 512.367, |
|
"eval_steps_per_second": 16.026, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7735507246376815e-05, |
|
"loss": 0.6778, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.6738665308201732, |
|
"eval_loss": 0.7475225329399109, |
|
"eval_runtime": 18.8719, |
|
"eval_samples_per_second": 520.085, |
|
"eval_steps_per_second": 16.268, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7509057971014495e-05, |
|
"loss": 0.6737, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.6781456953642384, |
|
"eval_loss": 0.7495400309562683, |
|
"eval_runtime": 24.231, |
|
"eval_samples_per_second": 405.059, |
|
"eval_steps_per_second": 12.67, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7282608695652176e-05, |
|
"loss": 0.6712, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.6826286296484972, |
|
"eval_loss": 0.7350295782089233, |
|
"eval_runtime": 25.183, |
|
"eval_samples_per_second": 389.747, |
|
"eval_steps_per_second": 12.191, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7056159420289856e-05, |
|
"loss": 0.6559, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.6896586856851757, |
|
"eval_loss": 0.7274259328842163, |
|
"eval_runtime": 20.5021, |
|
"eval_samples_per_second": 478.731, |
|
"eval_steps_per_second": 14.974, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6829710144927537e-05, |
|
"loss": 0.6493, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.6901681100356597, |
|
"eval_loss": 0.7247650623321533, |
|
"eval_runtime": 19.0556, |
|
"eval_samples_per_second": 515.071, |
|
"eval_steps_per_second": 16.111, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6603260869565218e-05, |
|
"loss": 0.6483, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.6857870606214977, |
|
"eval_loss": 0.726345419883728, |
|
"eval_runtime": 19.1114, |
|
"eval_samples_per_second": 513.568, |
|
"eval_steps_per_second": 16.064, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6376811594202898e-05, |
|
"loss": 0.6445, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.697809475292919, |
|
"eval_loss": 0.7070016860961914, |
|
"eval_runtime": 19.0198, |
|
"eval_samples_per_second": 516.041, |
|
"eval_steps_per_second": 16.141, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.615036231884058e-05, |
|
"loss": 0.6467, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.6981151299032093, |
|
"eval_loss": 0.7083250284194946, |
|
"eval_runtime": 19.0519, |
|
"eval_samples_per_second": 515.172, |
|
"eval_steps_per_second": 16.114, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5923913043478263e-05, |
|
"loss": 0.6332, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7003565970453388, |
|
"eval_loss": 0.6996482014656067, |
|
"eval_runtime": 18.8872, |
|
"eval_samples_per_second": 519.665, |
|
"eval_steps_per_second": 16.254, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5697463768115943e-05, |
|
"loss": 0.6288, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.697809475292919, |
|
"eval_loss": 0.6978936195373535, |
|
"eval_runtime": 19.1365, |
|
"eval_samples_per_second": 512.895, |
|
"eval_steps_per_second": 16.043, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5471014492753624e-05, |
|
"loss": 0.6308, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7040244523688233, |
|
"eval_loss": 0.6911584734916687, |
|
"eval_runtime": 19.2976, |
|
"eval_samples_per_second": 508.612, |
|
"eval_steps_per_second": 15.909, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5244565217391305e-05, |
|
"loss": 0.622, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.7092205807437596, |
|
"eval_loss": 0.6903654932975769, |
|
"eval_runtime": 20.766, |
|
"eval_samples_per_second": 472.648, |
|
"eval_steps_per_second": 14.784, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5018115942028985e-05, |
|
"loss": 0.615, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.7094243504839531, |
|
"eval_loss": 0.6871742010116577, |
|
"eval_runtime": 20.0444, |
|
"eval_samples_per_second": 489.662, |
|
"eval_steps_per_second": 15.316, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.479166666666667e-05, |
|
"loss": 0.6186, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.7074885379521141, |
|
"eval_loss": 0.6876837611198425, |
|
"eval_runtime": 18.8534, |
|
"eval_samples_per_second": 520.594, |
|
"eval_steps_per_second": 16.283, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.456521739130435e-05, |
|
"loss": 0.6183, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.7110545084055018, |
|
"eval_loss": 0.6818488836288452, |
|
"eval_runtime": 18.9963, |
|
"eval_samples_per_second": 516.679, |
|
"eval_steps_per_second": 16.161, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.433876811594203e-05, |
|
"loss": 0.6115, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.7121752419765665, |
|
"eval_loss": 0.6855539083480835, |
|
"eval_runtime": 18.8559, |
|
"eval_samples_per_second": 520.528, |
|
"eval_steps_per_second": 16.281, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4112318840579711e-05, |
|
"loss": 0.608, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.7178807947019867, |
|
"eval_loss": 0.6696537137031555, |
|
"eval_runtime": 18.8735, |
|
"eval_samples_per_second": 520.042, |
|
"eval_steps_per_second": 16.266, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3885869565217392e-05, |
|
"loss": 0.6071, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.7180845644421804, |
|
"eval_loss": 0.6726976633071899, |
|
"eval_runtime": 18.9354, |
|
"eval_samples_per_second": 518.342, |
|
"eval_steps_per_second": 16.213, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3659420289855074e-05, |
|
"loss": 0.601, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7117677024961793, |
|
"eval_loss": 0.6798397898674011, |
|
"eval_runtime": 18.8349, |
|
"eval_samples_per_second": 521.108, |
|
"eval_steps_per_second": 16.3, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3432971014492755e-05, |
|
"loss": 0.6018, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7070809984717269, |
|
"eval_loss": 0.685390293598175, |
|
"eval_runtime": 19.049, |
|
"eval_samples_per_second": 515.251, |
|
"eval_steps_per_second": 16.116, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3206521739130435e-05, |
|
"loss": 0.5762, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.7214467651553744, |
|
"eval_loss": 0.6697466373443604, |
|
"eval_runtime": 18.9286, |
|
"eval_samples_per_second": 518.528, |
|
"eval_steps_per_second": 16.219, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.2980072463768116e-05, |
|
"loss": 0.5507, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.7184921039225675, |
|
"eval_loss": 0.6709696054458618, |
|
"eval_runtime": 20.3622, |
|
"eval_samples_per_second": 482.02, |
|
"eval_steps_per_second": 15.077, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2753623188405797e-05, |
|
"loss": 0.5575, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.7225674987264391, |
|
"eval_loss": 0.6709049344062805, |
|
"eval_runtime": 20.0855, |
|
"eval_samples_per_second": 488.662, |
|
"eval_steps_per_second": 15.285, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.252717391304348e-05, |
|
"loss": 0.5493, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.7191034131431483, |
|
"eval_loss": 0.6659159064292908, |
|
"eval_runtime": 20.41, |
|
"eval_samples_per_second": 480.892, |
|
"eval_steps_per_second": 15.042, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2300724637681161e-05, |
|
"loss": 0.5464, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.7231788079470198, |
|
"eval_loss": 0.6709375381469727, |
|
"eval_runtime": 18.9369, |
|
"eval_samples_per_second": 518.301, |
|
"eval_steps_per_second": 16.212, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2074275362318842e-05, |
|
"loss": 0.5595, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.7219561895058584, |
|
"eval_loss": 0.6641885638237, |
|
"eval_runtime": 18.8852, |
|
"eval_samples_per_second": 519.718, |
|
"eval_steps_per_second": 16.256, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1847826086956522e-05, |
|
"loss": 0.5446, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.720224146714213, |
|
"eval_loss": 0.6708667874336243, |
|
"eval_runtime": 18.8795, |
|
"eval_samples_per_second": 519.875, |
|
"eval_steps_per_second": 16.261, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1621376811594205e-05, |
|
"loss": 0.5524, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.714824248599083, |
|
"eval_loss": 0.6751348972320557, |
|
"eval_runtime": 18.8954, |
|
"eval_samples_per_second": 519.438, |
|
"eval_steps_per_second": 16.247, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1394927536231885e-05, |
|
"loss": 0.5473, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7209373408048905, |
|
"eval_loss": 0.6641535758972168, |
|
"eval_runtime": 18.8349, |
|
"eval_samples_per_second": 521.107, |
|
"eval_steps_per_second": 16.3, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1168478260869566e-05, |
|
"loss": 0.5477, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.7222618441161488, |
|
"eval_loss": 0.6661530137062073, |
|
"eval_runtime": 18.9172, |
|
"eval_samples_per_second": 518.841, |
|
"eval_steps_per_second": 16.229, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0942028985507247e-05, |
|
"loss": 0.5522, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.7226693835965359, |
|
"eval_loss": 0.6586304306983948, |
|
"eval_runtime": 19.03, |
|
"eval_samples_per_second": 515.763, |
|
"eval_steps_per_second": 16.132, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0715579710144927e-05, |
|
"loss": 0.5406, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.7258278145695364, |
|
"eval_loss": 0.6601831912994385, |
|
"eval_runtime": 20.4522, |
|
"eval_samples_per_second": 479.9, |
|
"eval_steps_per_second": 15.011, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0489130434782611e-05, |
|
"loss": 0.54, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.7272542027508915, |
|
"eval_loss": 0.6564027070999146, |
|
"eval_runtime": 20.4433, |
|
"eval_samples_per_second": 480.108, |
|
"eval_steps_per_second": 15.017, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0262681159420292e-05, |
|
"loss": 0.5458, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.7213448802852777, |
|
"eval_loss": 0.6779993176460266, |
|
"eval_runtime": 20.545, |
|
"eval_samples_per_second": 477.731, |
|
"eval_steps_per_second": 14.943, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0036231884057972e-05, |
|
"loss": 0.5448, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.7234844625573102, |
|
"eval_loss": 0.6560543775558472, |
|
"eval_runtime": 18.9249, |
|
"eval_samples_per_second": 518.629, |
|
"eval_steps_per_second": 16.222, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.809782608695653e-06, |
|
"loss": 0.5418, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.7253183902190524, |
|
"eval_loss": 0.6599519848823547, |
|
"eval_runtime": 19.0375, |
|
"eval_samples_per_second": 515.561, |
|
"eval_steps_per_second": 16.126, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 0.5408, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.7273560876209882, |
|
"eval_loss": 0.6616316437721252, |
|
"eval_runtime": 19.3854, |
|
"eval_samples_per_second": 506.308, |
|
"eval_steps_per_second": 15.837, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.356884057971016e-06, |
|
"loss": 0.5451, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.7282730514518594, |
|
"eval_loss": 0.655745267868042, |
|
"eval_runtime": 18.8581, |
|
"eval_samples_per_second": 520.467, |
|
"eval_steps_per_second": 16.28, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.130434782608697e-06, |
|
"loss": 0.5385, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.7294956698930208, |
|
"eval_loss": 0.6583314538002014, |
|
"eval_runtime": 19.1341, |
|
"eval_samples_per_second": 512.958, |
|
"eval_steps_per_second": 16.045, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.903985507246377e-06, |
|
"loss": 0.5261, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.7324503311258278, |
|
"eval_loss": 0.6467986106872559, |
|
"eval_runtime": 19.0986, |
|
"eval_samples_per_second": 513.912, |
|
"eval_steps_per_second": 16.074, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.677536231884058e-06, |
|
"loss": 0.5364, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.732857870606215, |
|
"eval_loss": 0.644736111164093, |
|
"eval_runtime": 18.8951, |
|
"eval_samples_per_second": 519.447, |
|
"eval_steps_per_second": 16.248, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.45108695652174e-06, |
|
"loss": 0.5294, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.7320427916454406, |
|
"eval_loss": 0.6429356932640076, |
|
"eval_runtime": 20.711, |
|
"eval_samples_per_second": 473.903, |
|
"eval_steps_per_second": 14.823, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.22463768115942e-06, |
|
"loss": 0.5332, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.7271523178807947, |
|
"eval_loss": 0.6508257985115051, |
|
"eval_runtime": 20.8391, |
|
"eval_samples_per_second": 470.989, |
|
"eval_steps_per_second": 14.732, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.998188405797103e-06, |
|
"loss": 0.5274, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.7325522159959246, |
|
"eval_loss": 0.6491544842720032, |
|
"eval_runtime": 19.9398, |
|
"eval_samples_per_second": 492.231, |
|
"eval_steps_per_second": 15.396, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.771739130434784e-06, |
|
"loss": 0.5286, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.7318390219052471, |
|
"eval_loss": 0.6470006704330444, |
|
"eval_runtime": 18.918, |
|
"eval_samples_per_second": 518.818, |
|
"eval_steps_per_second": 16.228, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.545289855072464e-06, |
|
"loss": 0.5359, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.7354049923586348, |
|
"eval_loss": 0.6392927169799805, |
|
"eval_runtime": 19.0427, |
|
"eval_samples_per_second": 515.422, |
|
"eval_steps_per_second": 16.122, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.318840579710146e-06, |
|
"loss": 0.5366, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.736729495669893, |
|
"eval_loss": 0.6445118188858032, |
|
"eval_runtime": 19.0599, |
|
"eval_samples_per_second": 514.955, |
|
"eval_steps_per_second": 16.107, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.092391304347826e-06, |
|
"loss": 0.5296, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.7313295975547631, |
|
"eval_loss": 0.6413289904594421, |
|
"eval_runtime": 18.9234, |
|
"eval_samples_per_second": 518.671, |
|
"eval_steps_per_second": 16.223, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.865942028985509e-06, |
|
"loss": 0.5346, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.7315333672949567, |
|
"eval_loss": 0.6392691135406494, |
|
"eval_runtime": 18.8526, |
|
"eval_samples_per_second": 520.617, |
|
"eval_steps_per_second": 16.284, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.639492753623189e-06, |
|
"loss": 0.5264, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7357106469689251, |
|
"eval_loss": 0.6448105573654175, |
|
"eval_runtime": 18.9027, |
|
"eval_samples_per_second": 519.238, |
|
"eval_steps_per_second": 16.241, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.41304347826087e-06, |
|
"loss": 0.4857, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.7334691798267957, |
|
"eval_loss": 0.6640365123748779, |
|
"eval_runtime": 19.0337, |
|
"eval_samples_per_second": 515.664, |
|
"eval_steps_per_second": 16.129, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.186594202898551e-06, |
|
"loss": 0.4888, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.7318390219052471, |
|
"eval_loss": 0.6611655950546265, |
|
"eval_runtime": 20.389, |
|
"eval_samples_per_second": 481.387, |
|
"eval_steps_per_second": 15.057, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.960144927536232e-06, |
|
"loss": 0.4964, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.7336729495669893, |
|
"eval_loss": 0.6516362428665161, |
|
"eval_runtime": 20.5019, |
|
"eval_samples_per_second": 478.737, |
|
"eval_steps_per_second": 14.974, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.733695652173914e-06, |
|
"loss": 0.493, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7356087620988283, |
|
"eval_loss": 0.6503166556358337, |
|
"eval_runtime": 20.2046, |
|
"eval_samples_per_second": 485.78, |
|
"eval_steps_per_second": 15.195, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.4961, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.734793683138054, |
|
"eval_loss": 0.6519193053245544, |
|
"eval_runtime": 18.9078, |
|
"eval_samples_per_second": 519.097, |
|
"eval_steps_per_second": 16.237, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2807971014492755e-06, |
|
"loss": 0.4847, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.7326541008660213, |
|
"eval_loss": 0.6516821980476379, |
|
"eval_runtime": 19.1631, |
|
"eval_samples_per_second": 512.183, |
|
"eval_steps_per_second": 16.02, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.054347826086957e-06, |
|
"loss": 0.483, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.7310239429444727, |
|
"eval_loss": 0.655524492263794, |
|
"eval_runtime": 19.0256, |
|
"eval_samples_per_second": 515.884, |
|
"eval_steps_per_second": 16.136, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.8278985507246375e-06, |
|
"loss": 0.4857, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.7312277126846664, |
|
"eval_loss": 0.6524935364723206, |
|
"eval_runtime": 19.0792, |
|
"eval_samples_per_second": 514.435, |
|
"eval_steps_per_second": 16.091, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.601449275362319e-06, |
|
"loss": 0.484, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.7341823739174732, |
|
"eval_loss": 0.6443938612937927, |
|
"eval_runtime": 19.0647, |
|
"eval_samples_per_second": 514.826, |
|
"eval_steps_per_second": 16.103, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.4792, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.7329597554763118, |
|
"eval_loss": 0.6508365273475647, |
|
"eval_runtime": 19.1608, |
|
"eval_samples_per_second": 512.243, |
|
"eval_steps_per_second": 16.022, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.148550724637682e-06, |
|
"loss": 0.488, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.7343861436576669, |
|
"eval_loss": 0.6512596607208252, |
|
"eval_runtime": 20.0389, |
|
"eval_samples_per_second": 489.797, |
|
"eval_steps_per_second": 15.32, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.9221014492753625e-06, |
|
"loss": 0.472, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.7345899133978604, |
|
"eval_loss": 0.6546801328659058, |
|
"eval_runtime": 21.0064, |
|
"eval_samples_per_second": 467.238, |
|
"eval_steps_per_second": 14.615, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6956521739130436e-06, |
|
"loss": 0.4872, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.7341823739174732, |
|
"eval_loss": 0.650049090385437, |
|
"eval_runtime": 20.3969, |
|
"eval_samples_per_second": 481.201, |
|
"eval_steps_per_second": 15.051, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4692028985507246e-06, |
|
"loss": 0.4782, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.735812531839022, |
|
"eval_loss": 0.6584815979003906, |
|
"eval_runtime": 20.5297, |
|
"eval_samples_per_second": 478.088, |
|
"eval_steps_per_second": 14.954, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.242753623188406e-06, |
|
"loss": 0.481, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.7356087620988283, |
|
"eval_loss": 0.647666335105896, |
|
"eval_runtime": 18.9629, |
|
"eval_samples_per_second": 517.589, |
|
"eval_steps_per_second": 16.189, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.016304347826087e-06, |
|
"loss": 0.4822, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.7345899133978604, |
|
"eval_loss": 0.6586557626724243, |
|
"eval_runtime": 19.0494, |
|
"eval_samples_per_second": 515.239, |
|
"eval_steps_per_second": 16.116, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7898550724637686e-06, |
|
"loss": 0.4728, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.7339786041772797, |
|
"eval_loss": 0.6572148203849792, |
|
"eval_runtime": 18.9091, |
|
"eval_samples_per_second": 519.063, |
|
"eval_steps_per_second": 16.236, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.563405797101449e-06, |
|
"loss": 0.4841, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.7374426897605706, |
|
"eval_loss": 0.6442714929580688, |
|
"eval_runtime": 19.0799, |
|
"eval_samples_per_second": 514.415, |
|
"eval_steps_per_second": 16.09, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3369565217391307e-06, |
|
"loss": 0.4885, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.736220071319409, |
|
"eval_loss": 0.6493647694587708, |
|
"eval_runtime": 18.9053, |
|
"eval_samples_per_second": 519.165, |
|
"eval_steps_per_second": 16.239, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.1105072463768117e-06, |
|
"loss": 0.4752, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.7381558838512481, |
|
"eval_loss": 0.6508816480636597, |
|
"eval_runtime": 19.0533, |
|
"eval_samples_per_second": 515.135, |
|
"eval_steps_per_second": 16.113, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.884057971014493e-06, |
|
"loss": 0.4883, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.7371370351502802, |
|
"eval_loss": 0.6456698179244995, |
|
"eval_runtime": 18.855, |
|
"eval_samples_per_second": 520.551, |
|
"eval_steps_per_second": 16.282, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.657608695652174e-06, |
|
"loss": 0.4888, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.7364238410596027, |
|
"eval_loss": 0.6496989130973816, |
|
"eval_runtime": 20.8847, |
|
"eval_samples_per_second": 469.962, |
|
"eval_steps_per_second": 14.7, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4311594202898552e-06, |
|
"loss": 0.4844, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.7376464595007641, |
|
"eval_loss": 0.6481407880783081, |
|
"eval_runtime": 20.1004, |
|
"eval_samples_per_second": 488.299, |
|
"eval_steps_per_second": 15.273, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.2047101449275363e-06, |
|
"loss": 0.4833, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7388690779419256, |
|
"eval_loss": 0.6450786590576172, |
|
"eval_runtime": 20.1432, |
|
"eval_samples_per_second": 487.26, |
|
"eval_steps_per_second": 15.241, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.782608695652175e-07, |
|
"loss": 0.48, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.7373408048904737, |
|
"eval_loss": 0.6423162221908569, |
|
"eval_runtime": 18.9117, |
|
"eval_samples_per_second": 518.992, |
|
"eval_steps_per_second": 16.233, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.518115942028987e-07, |
|
"loss": 0.4832, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.7357106469689251, |
|
"eval_loss": 0.6477082371711731, |
|
"eval_runtime": 18.9455, |
|
"eval_samples_per_second": 518.066, |
|
"eval_steps_per_second": 16.204, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.253623188405797e-07, |
|
"loss": 0.4805, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.7378502292409577, |
|
"eval_loss": 0.6464207768440247, |
|
"eval_runtime": 18.9094, |
|
"eval_samples_per_second": 519.053, |
|
"eval_steps_per_second": 16.235, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.989130434782609e-07, |
|
"loss": 0.4775, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7379521141110545, |
|
"eval_loss": 0.6477315425872803, |
|
"eval_runtime": 19.0715, |
|
"eval_samples_per_second": 514.642, |
|
"eval_steps_per_second": 16.097, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.246376811594204e-08, |
|
"loss": 0.4843, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7380539989811513, |
|
"eval_loss": 0.6481098532676697, |
|
"eval_runtime": 18.8988, |
|
"eval_samples_per_second": 519.345, |
|
"eval_steps_per_second": 16.244, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 88320, |
|
"total_flos": 5.0801111188255944e+16, |
|
"train_loss": 0.565458259202432, |
|
"train_runtime": 14832.9522, |
|
"train_samples_per_second": 190.536, |
|
"train_steps_per_second": 5.954 |
|
} |
|
], |
|
"max_steps": 88320, |
|
"num_train_epochs": 3, |
|
"total_flos": 5.0801111188255944e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|