|
{ |
|
"best_metric": 0.8122261844116149, |
|
"best_model_checkpoint": "/home/user/emrecan/models/dbmdz_convbert-base-turkish-mc4-cased_allnli_tr/checkpoint-80000", |
|
"epoch": 3.0, |
|
"global_step": 88320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9773550724637682e-05, |
|
"loss": 0.7338, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.723586347427407, |
|
"eval_loss": 0.6721901297569275, |
|
"eval_runtime": 44.7079, |
|
"eval_samples_per_second": 219.536, |
|
"eval_steps_per_second": 6.867, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9547101449275363e-05, |
|
"loss": 0.603, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.7398879266428935, |
|
"eval_loss": 0.6465004682540894, |
|
"eval_runtime": 44.7809, |
|
"eval_samples_per_second": 219.178, |
|
"eval_steps_per_second": 6.856, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9320652173913047e-05, |
|
"loss": 0.5605, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7727967396841569, |
|
"eval_loss": 0.5801343321800232, |
|
"eval_runtime": 44.4418, |
|
"eval_samples_per_second": 220.851, |
|
"eval_steps_per_second": 6.908, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9094202898550727e-05, |
|
"loss": 0.55, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.7626082526744778, |
|
"eval_loss": 0.5994369983673096, |
|
"eval_runtime": 44.603, |
|
"eval_samples_per_second": 220.052, |
|
"eval_steps_per_second": 6.883, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8867753623188408e-05, |
|
"loss": 0.529, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.7697401935812532, |
|
"eval_loss": 0.5720144510269165, |
|
"eval_runtime": 44.7705, |
|
"eval_samples_per_second": 219.229, |
|
"eval_steps_per_second": 6.857, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.864130434782609e-05, |
|
"loss": 0.5196, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7768721344880285, |
|
"eval_loss": 0.5691649317741394, |
|
"eval_runtime": 44.7386, |
|
"eval_samples_per_second": 219.385, |
|
"eval_steps_per_second": 6.862, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.841485507246377e-05, |
|
"loss": 0.5117, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.7785022924095771, |
|
"eval_loss": 0.5724737644195557, |
|
"eval_runtime": 44.7709, |
|
"eval_samples_per_second": 219.227, |
|
"eval_steps_per_second": 6.857, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.818840579710145e-05, |
|
"loss": 0.5044, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.7787060621497708, |
|
"eval_loss": 0.5532070398330688, |
|
"eval_runtime": 44.6683, |
|
"eval_samples_per_second": 219.731, |
|
"eval_steps_per_second": 6.873, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7961956521739134e-05, |
|
"loss": 0.5016, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.7811512990320937, |
|
"eval_loss": 0.5546181797981262, |
|
"eval_runtime": 44.6036, |
|
"eval_samples_per_second": 220.049, |
|
"eval_steps_per_second": 6.883, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7735507246376815e-05, |
|
"loss": 0.5031, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.7869587366276108, |
|
"eval_loss": 0.5461075901985168, |
|
"eval_runtime": 44.6087, |
|
"eval_samples_per_second": 220.024, |
|
"eval_steps_per_second": 6.882, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7509057971014495e-05, |
|
"loss": 0.4949, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.7825776872134488, |
|
"eval_loss": 0.5724930763244629, |
|
"eval_runtime": 44.6365, |
|
"eval_samples_per_second": 219.888, |
|
"eval_steps_per_second": 6.878, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7282608695652176e-05, |
|
"loss": 0.4894, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.7932755985736119, |
|
"eval_loss": 0.5418519377708435, |
|
"eval_runtime": 44.7444, |
|
"eval_samples_per_second": 219.357, |
|
"eval_steps_per_second": 6.861, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7056159420289856e-05, |
|
"loss": 0.4796, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.7914416709118696, |
|
"eval_loss": 0.5278186798095703, |
|
"eval_runtime": 44.5917, |
|
"eval_samples_per_second": 220.108, |
|
"eval_steps_per_second": 6.885, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6829710144927537e-05, |
|
"loss": 0.4795, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.7953132959755477, |
|
"eval_loss": 0.5193336606025696, |
|
"eval_runtime": 44.673, |
|
"eval_samples_per_second": 219.708, |
|
"eval_steps_per_second": 6.872, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6603260869565218e-05, |
|
"loss": 0.4713, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.7770759042282221, |
|
"eval_loss": 0.5534396171569824, |
|
"eval_runtime": 44.688, |
|
"eval_samples_per_second": 219.634, |
|
"eval_steps_per_second": 6.87, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6376811594202898e-05, |
|
"loss": 0.4738, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.803871625063678, |
|
"eval_loss": 0.5097627639770508, |
|
"eval_runtime": 44.7421, |
|
"eval_samples_per_second": 219.368, |
|
"eval_steps_per_second": 6.862, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.615036231884058e-05, |
|
"loss": 0.481, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.7958227203260316, |
|
"eval_loss": 0.5244179368019104, |
|
"eval_runtime": 44.7909, |
|
"eval_samples_per_second": 219.129, |
|
"eval_steps_per_second": 6.854, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5923913043478263e-05, |
|
"loss": 0.4634, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7972491085073866, |
|
"eval_loss": 0.5214974880218506, |
|
"eval_runtime": 44.5586, |
|
"eval_samples_per_second": 220.272, |
|
"eval_steps_per_second": 6.89, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5697463768115943e-05, |
|
"loss": 0.465, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.7984717269485482, |
|
"eval_loss": 0.5128703117370605, |
|
"eval_runtime": 44.7294, |
|
"eval_samples_per_second": 219.431, |
|
"eval_steps_per_second": 6.863, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5471014492753624e-05, |
|
"loss": 0.4624, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.8046867040244524, |
|
"eval_loss": 0.5061507821083069, |
|
"eval_runtime": 44.7949, |
|
"eval_samples_per_second": 219.11, |
|
"eval_steps_per_second": 6.853, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5244565217391305e-05, |
|
"loss": 0.4597, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.8028527763627101, |
|
"eval_loss": 0.5114405751228333, |
|
"eval_runtime": 47.1654, |
|
"eval_samples_per_second": 208.098, |
|
"eval_steps_per_second": 6.509, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5018115942028985e-05, |
|
"loss": 0.4571, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.807335710646969, |
|
"eval_loss": 0.5069620609283447, |
|
"eval_runtime": 45.7859, |
|
"eval_samples_per_second": 214.368, |
|
"eval_steps_per_second": 6.705, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.479166666666667e-05, |
|
"loss": 0.4602, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.7992868059093224, |
|
"eval_loss": 0.5114511847496033, |
|
"eval_runtime": 45.8642, |
|
"eval_samples_per_second": 214.001, |
|
"eval_steps_per_second": 6.694, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.456521739130435e-05, |
|
"loss": 0.4552, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.8051961283749364, |
|
"eval_loss": 0.5085225105285645, |
|
"eval_runtime": 46.055, |
|
"eval_samples_per_second": 213.115, |
|
"eval_steps_per_second": 6.666, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.433876811594203e-05, |
|
"loss": 0.4538, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.7973509933774835, |
|
"eval_loss": 0.5118097066879272, |
|
"eval_runtime": 45.9419, |
|
"eval_samples_per_second": 213.639, |
|
"eval_steps_per_second": 6.682, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4112318840579711e-05, |
|
"loss": 0.4517, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.804381049414162, |
|
"eval_loss": 0.5036212205886841, |
|
"eval_runtime": 45.8528, |
|
"eval_samples_per_second": 214.055, |
|
"eval_steps_per_second": 6.695, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3885869565217392e-05, |
|
"loss": 0.4517, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.8062149770759043, |
|
"eval_loss": 0.4929669201374054, |
|
"eval_runtime": 45.8655, |
|
"eval_samples_per_second": 213.995, |
|
"eval_steps_per_second": 6.693, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3659420289855074e-05, |
|
"loss": 0.4413, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7964340295466124, |
|
"eval_loss": 0.5306665897369385, |
|
"eval_runtime": 45.9438, |
|
"eval_samples_per_second": 213.631, |
|
"eval_steps_per_second": 6.682, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3432971014492755e-05, |
|
"loss": 0.4483, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7937850229240958, |
|
"eval_loss": 0.5194875001907349, |
|
"eval_runtime": 45.8704, |
|
"eval_samples_per_second": 213.972, |
|
"eval_steps_per_second": 6.693, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3206521739130435e-05, |
|
"loss": 0.4036, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.8028527763627101, |
|
"eval_loss": 0.5238322615623474, |
|
"eval_runtime": 45.9862, |
|
"eval_samples_per_second": 213.434, |
|
"eval_steps_per_second": 6.676, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.2980072463768116e-05, |
|
"loss": 0.3724, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.8081507896077432, |
|
"eval_loss": 0.5124598741531372, |
|
"eval_runtime": 47.042, |
|
"eval_samples_per_second": 208.643, |
|
"eval_steps_per_second": 6.526, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2753623188405797e-05, |
|
"loss": 0.3777, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.8075394803871625, |
|
"eval_loss": 0.509867787361145, |
|
"eval_runtime": 46.0494, |
|
"eval_samples_per_second": 213.141, |
|
"eval_steps_per_second": 6.667, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.252717391304348e-05, |
|
"loss": 0.3753, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.8052980132450331, |
|
"eval_loss": 0.5171676278114319, |
|
"eval_runtime": 45.8652, |
|
"eval_samples_per_second": 213.997, |
|
"eval_steps_per_second": 6.694, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2300724637681161e-05, |
|
"loss": 0.367, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.8052980132450331, |
|
"eval_loss": 0.5187755823135376, |
|
"eval_runtime": 45.7807, |
|
"eval_samples_per_second": 214.391, |
|
"eval_steps_per_second": 6.706, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2074275362318842e-05, |
|
"loss": 0.3819, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.8045848191543555, |
|
"eval_loss": 0.5218426585197449, |
|
"eval_runtime": 46.9682, |
|
"eval_samples_per_second": 208.971, |
|
"eval_steps_per_second": 6.536, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1847826086956522e-05, |
|
"loss": 0.363, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.7992868059093224, |
|
"eval_loss": 0.5201919078826904, |
|
"eval_runtime": 46.028, |
|
"eval_samples_per_second": 213.24, |
|
"eval_steps_per_second": 6.67, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1621376811594205e-05, |
|
"loss": 0.3794, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.8047885888945492, |
|
"eval_loss": 0.5240486860275269, |
|
"eval_runtime": 46.0171, |
|
"eval_samples_per_second": 213.29, |
|
"eval_steps_per_second": 6.671, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1394927536231885e-05, |
|
"loss": 0.3749, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.8053998981151299, |
|
"eval_loss": 0.5025837421417236, |
|
"eval_runtime": 45.9936, |
|
"eval_samples_per_second": 213.399, |
|
"eval_steps_per_second": 6.675, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1168478260869566e-05, |
|
"loss": 0.367, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.8075394803871625, |
|
"eval_loss": 0.5197691917419434, |
|
"eval_runtime": 46.0223, |
|
"eval_samples_per_second": 213.266, |
|
"eval_steps_per_second": 6.671, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0942028985507247e-05, |
|
"loss": 0.3759, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.7992868059093224, |
|
"eval_loss": 0.5298164486885071, |
|
"eval_runtime": 46.0659, |
|
"eval_samples_per_second": 213.064, |
|
"eval_steps_per_second": 6.664, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0715579710144927e-05, |
|
"loss": 0.3701, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8090677534386144, |
|
"eval_loss": 0.5072407722473145, |
|
"eval_runtime": 47.2083, |
|
"eval_samples_per_second": 207.908, |
|
"eval_steps_per_second": 6.503, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0489130434782611e-05, |
|
"loss": 0.3742, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.8097809475292919, |
|
"eval_loss": 0.507131814956665, |
|
"eval_runtime": 46.0803, |
|
"eval_samples_per_second": 212.998, |
|
"eval_steps_per_second": 6.662, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0262681159420292e-05, |
|
"loss": 0.3706, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.8036678553234845, |
|
"eval_loss": 0.5317161083221436, |
|
"eval_runtime": 46.1098, |
|
"eval_samples_per_second": 212.861, |
|
"eval_steps_per_second": 6.658, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0036231884057972e-05, |
|
"loss": 0.3716, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.8051961283749364, |
|
"eval_loss": 0.5034372210502625, |
|
"eval_runtime": 46.0657, |
|
"eval_samples_per_second": 213.065, |
|
"eval_steps_per_second": 6.664, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.809782608695653e-06, |
|
"loss": 0.3717, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.8012226184411615, |
|
"eval_loss": 0.525775671005249, |
|
"eval_runtime": 45.917, |
|
"eval_samples_per_second": 213.755, |
|
"eval_steps_per_second": 6.686, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 0.3714, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.8049923586347427, |
|
"eval_loss": 0.5195213556289673, |
|
"eval_runtime": 46.0299, |
|
"eval_samples_per_second": 213.231, |
|
"eval_steps_per_second": 6.67, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.356884057971016e-06, |
|
"loss": 0.3781, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.8103922567498727, |
|
"eval_loss": 0.5003909468650818, |
|
"eval_runtime": 46.0896, |
|
"eval_samples_per_second": 212.955, |
|
"eval_steps_per_second": 6.661, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.130434782608697e-06, |
|
"loss": 0.3725, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.8113092205807437, |
|
"eval_loss": 0.5124202966690063, |
|
"eval_runtime": 46.0326, |
|
"eval_samples_per_second": 213.218, |
|
"eval_steps_per_second": 6.669, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.903985507246377e-06, |
|
"loss": 0.3624, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.8093734080489048, |
|
"eval_loss": 0.5039945840835571, |
|
"eval_runtime": 46.0265, |
|
"eval_samples_per_second": 213.247, |
|
"eval_steps_per_second": 6.67, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.677536231884058e-06, |
|
"loss": 0.3657, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.8111054508405502, |
|
"eval_loss": 0.49794963002204895, |
|
"eval_runtime": 46.0671, |
|
"eval_samples_per_second": 213.059, |
|
"eval_steps_per_second": 6.664, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.45108695652174e-06, |
|
"loss": 0.3669, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.8099847172694855, |
|
"eval_loss": 0.4967914819717407, |
|
"eval_runtime": 47.5013, |
|
"eval_samples_per_second": 206.626, |
|
"eval_steps_per_second": 6.463, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.22463768115942e-06, |
|
"loss": 0.3636, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.8079470198675497, |
|
"eval_loss": 0.5075203776359558, |
|
"eval_runtime": 45.998, |
|
"eval_samples_per_second": 213.379, |
|
"eval_steps_per_second": 6.674, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.998188405797103e-06, |
|
"loss": 0.36, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.8110035659704534, |
|
"eval_loss": 0.498517245054245, |
|
"eval_runtime": 46.2214, |
|
"eval_samples_per_second": 212.347, |
|
"eval_steps_per_second": 6.642, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.771739130434784e-06, |
|
"loss": 0.3624, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.8070300560366785, |
|
"eval_loss": 0.5125023722648621, |
|
"eval_runtime": 46.2947, |
|
"eval_samples_per_second": 212.011, |
|
"eval_steps_per_second": 6.631, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.545289855072464e-06, |
|
"loss": 0.366, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.8117167600611309, |
|
"eval_loss": 0.4918198585510254, |
|
"eval_runtime": 46.0126, |
|
"eval_samples_per_second": 213.311, |
|
"eval_steps_per_second": 6.672, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.318840579710146e-06, |
|
"loss": 0.3655, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.8109016811003565, |
|
"eval_loss": 0.505085289478302, |
|
"eval_runtime": 46.0789, |
|
"eval_samples_per_second": 213.004, |
|
"eval_steps_per_second": 6.662, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.092391304347826e-06, |
|
"loss": 0.3609, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.8104941416199695, |
|
"eval_loss": 0.5082952976226807, |
|
"eval_runtime": 46.1283, |
|
"eval_samples_per_second": 212.776, |
|
"eval_steps_per_second": 6.655, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.865942028985509e-06, |
|
"loss": 0.3672, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.8084564442180336, |
|
"eval_loss": 0.5128825902938843, |
|
"eval_runtime": 46.1412, |
|
"eval_samples_per_second": 212.717, |
|
"eval_steps_per_second": 6.653, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.639492753623189e-06, |
|
"loss": 0.3545, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8109016811003565, |
|
"eval_loss": 0.5467332601547241, |
|
"eval_runtime": 46.0619, |
|
"eval_samples_per_second": 213.083, |
|
"eval_steps_per_second": 6.665, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.41304347826087e-06, |
|
"loss": 0.2938, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.804890473764646, |
|
"eval_loss": 0.5634797811508179, |
|
"eval_runtime": 46.0511, |
|
"eval_samples_per_second": 213.133, |
|
"eval_steps_per_second": 6.667, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.186594202898551e-06, |
|
"loss": 0.29, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.8040753948038716, |
|
"eval_loss": 0.5781472325325012, |
|
"eval_runtime": 47.2218, |
|
"eval_samples_per_second": 207.849, |
|
"eval_steps_per_second": 6.501, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.960144927536232e-06, |
|
"loss": 0.2992, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.807743250127356, |
|
"eval_loss": 0.5470119714736938, |
|
"eval_runtime": 46.0932, |
|
"eval_samples_per_second": 212.938, |
|
"eval_steps_per_second": 6.66, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.733695652173914e-06, |
|
"loss": 0.2957, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.807335710646969, |
|
"eval_loss": 0.5764774084091187, |
|
"eval_runtime": 45.9878, |
|
"eval_samples_per_second": 213.426, |
|
"eval_steps_per_second": 6.676, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.292, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.8105960264900662, |
|
"eval_loss": 0.547173798084259, |
|
"eval_runtime": 45.972, |
|
"eval_samples_per_second": 213.5, |
|
"eval_steps_per_second": 6.678, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2807971014492755e-06, |
|
"loss": 0.2893, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.8084564442180336, |
|
"eval_loss": 0.5590358376502991, |
|
"eval_runtime": 45.9212, |
|
"eval_samples_per_second": 213.736, |
|
"eval_steps_per_second": 6.685, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.054347826086957e-06, |
|
"loss": 0.2883, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.8064187468160978, |
|
"eval_loss": 0.5534654855728149, |
|
"eval_runtime": 45.9485, |
|
"eval_samples_per_second": 213.609, |
|
"eval_steps_per_second": 6.681, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.8278985507246375e-06, |
|
"loss": 0.2923, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.8094752929190016, |
|
"eval_loss": 0.5508309602737427, |
|
"eval_runtime": 45.9406, |
|
"eval_samples_per_second": 213.645, |
|
"eval_steps_per_second": 6.683, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.601449275362319e-06, |
|
"loss": 0.2868, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.8097809475292919, |
|
"eval_loss": 0.5678631067276001, |
|
"eval_runtime": 45.9005, |
|
"eval_samples_per_second": 213.832, |
|
"eval_steps_per_second": 6.688, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.2892, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.8057055527254203, |
|
"eval_loss": 0.5659688115119934, |
|
"eval_runtime": 46.0172, |
|
"eval_samples_per_second": 213.29, |
|
"eval_steps_per_second": 6.671, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.148550724637682e-06, |
|
"loss": 0.292, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.808762098828324, |
|
"eval_loss": 0.54941326379776, |
|
"eval_runtime": 46.0397, |
|
"eval_samples_per_second": 213.186, |
|
"eval_steps_per_second": 6.668, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.9221014492753625e-06, |
|
"loss": 0.286, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.8084564442180336, |
|
"eval_loss": 0.5653256177902222, |
|
"eval_runtime": 47.1102, |
|
"eval_samples_per_second": 208.341, |
|
"eval_steps_per_second": 6.517, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6956521739130436e-06, |
|
"loss": 0.2939, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.8070300560366785, |
|
"eval_loss": 0.5673069953918457, |
|
"eval_runtime": 46.0492, |
|
"eval_samples_per_second": 213.142, |
|
"eval_steps_per_second": 6.667, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4692028985507246e-06, |
|
"loss": 0.286, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.8091696383087111, |
|
"eval_loss": 0.56004798412323, |
|
"eval_runtime": 45.9369, |
|
"eval_samples_per_second": 213.663, |
|
"eval_steps_per_second": 6.683, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.242753623188406e-06, |
|
"loss": 0.2844, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.8094752929190016, |
|
"eval_loss": 0.5507674217224121, |
|
"eval_runtime": 45.8631, |
|
"eval_samples_per_second": 214.006, |
|
"eval_steps_per_second": 6.694, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.016304347826087e-06, |
|
"loss": 0.2913, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.808762098828324, |
|
"eval_loss": 0.5644803047180176, |
|
"eval_runtime": 45.9087, |
|
"eval_samples_per_second": 213.794, |
|
"eval_steps_per_second": 6.687, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7898550724637686e-06, |
|
"loss": 0.2859, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.8094752929190016, |
|
"eval_loss": 0.5677375197410583, |
|
"eval_runtime": 46.1274, |
|
"eval_samples_per_second": 212.78, |
|
"eval_steps_per_second": 6.655, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.563405797101449e-06, |
|
"loss": 0.2892, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.8113092205807437, |
|
"eval_loss": 0.5597835183143616, |
|
"eval_runtime": 46.0994, |
|
"eval_samples_per_second": 212.909, |
|
"eval_steps_per_second": 6.66, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3369565217391307e-06, |
|
"loss": 0.2898, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.8095771777890983, |
|
"eval_loss": 0.5617640018463135, |
|
"eval_runtime": 45.9468, |
|
"eval_samples_per_second": 213.617, |
|
"eval_steps_per_second": 6.682, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.1105072463768117e-06, |
|
"loss": 0.2814, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.8102903718797758, |
|
"eval_loss": 0.5664277672767639, |
|
"eval_runtime": 46.0837, |
|
"eval_samples_per_second": 212.982, |
|
"eval_steps_per_second": 6.662, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.884057971014493e-06, |
|
"loss": 0.2917, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.8122261844116149, |
|
"eval_loss": 0.5484075546264648, |
|
"eval_runtime": 46.0805, |
|
"eval_samples_per_second": 212.997, |
|
"eval_steps_per_second": 6.662, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.657608695652174e-06, |
|
"loss": 0.2907, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.8116148751910341, |
|
"eval_loss": 0.5522238612174988, |
|
"eval_runtime": 47.1215, |
|
"eval_samples_per_second": 208.291, |
|
"eval_steps_per_second": 6.515, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4311594202898552e-06, |
|
"loss": 0.2896, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.8092715231788079, |
|
"eval_loss": 0.5539634823799133, |
|
"eval_runtime": 46.0412, |
|
"eval_samples_per_second": 213.179, |
|
"eval_steps_per_second": 6.668, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.2047101449275363e-06, |
|
"loss": 0.2907, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.8103922567498727, |
|
"eval_loss": 0.5468757152557373, |
|
"eval_runtime": 46.1268, |
|
"eval_samples_per_second": 212.783, |
|
"eval_steps_per_second": 6.656, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.782608695652175e-07, |
|
"loss": 0.2882, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.8122261844116149, |
|
"eval_loss": 0.5470659732818604, |
|
"eval_runtime": 46.0408, |
|
"eval_samples_per_second": 213.18, |
|
"eval_steps_per_second": 6.668, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.518115942028987e-07, |
|
"loss": 0.2878, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.8107997962302598, |
|
"eval_loss": 0.5532234907150269, |
|
"eval_runtime": 46.1642, |
|
"eval_samples_per_second": 212.61, |
|
"eval_steps_per_second": 6.65, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.253623188405797e-07, |
|
"loss": 0.2858, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.8115129903209374, |
|
"eval_loss": 0.5510849952697754, |
|
"eval_runtime": 46.0337, |
|
"eval_samples_per_second": 213.214, |
|
"eval_steps_per_second": 6.669, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.989130434782609e-07, |
|
"loss": 0.288, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.8111054508405502, |
|
"eval_loss": 0.5491093397140503, |
|
"eval_runtime": 45.9486, |
|
"eval_samples_per_second": 213.608, |
|
"eval_steps_per_second": 6.681, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.246376811594204e-08, |
|
"loss": 0.2834, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8111054508405502, |
|
"eval_loss": 0.5540585517883301, |
|
"eval_runtime": 45.8903, |
|
"eval_samples_per_second": 213.879, |
|
"eval_steps_per_second": 6.69, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 88320, |
|
"total_flos": 9.711893491230248e+16, |
|
"train_loss": 0.3844692539477694, |
|
"train_runtime": 45994.9721, |
|
"train_samples_per_second": 61.446, |
|
"train_steps_per_second": 1.92 |
|
} |
|
], |
|
"max_steps": 88320, |
|
"num_train_epochs": 3, |
|
"total_flos": 9.711893491230248e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|