|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993284083277367, |
|
"global_step": 744, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.865591397849462e-05, |
|
"loss": 1.8106, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.19744795560836792, |
|
"eval_loss": 1.7615934610366821, |
|
"eval_runtime": 134.0063, |
|
"eval_samples_per_second": 11.111, |
|
"eval_steps_per_second": 2.783, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.731182795698925e-05, |
|
"loss": 1.7268, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.2525184750556946, |
|
"eval_loss": 1.7187447547912598, |
|
"eval_runtime": 145.4077, |
|
"eval_samples_per_second": 10.24, |
|
"eval_steps_per_second": 2.565, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.596774193548387e-05, |
|
"loss": 1.7269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.309603750705719, |
|
"eval_loss": 1.6442092657089233, |
|
"eval_runtime": 144.9066, |
|
"eval_samples_per_second": 10.276, |
|
"eval_steps_per_second": 2.574, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.46236559139785e-05, |
|
"loss": 1.7086, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.33378106355667114, |
|
"eval_loss": 1.583362340927124, |
|
"eval_runtime": 135.7155, |
|
"eval_samples_per_second": 10.971, |
|
"eval_steps_per_second": 2.748, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.327956989247312e-05, |
|
"loss": 1.6983, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.35997313261032104, |
|
"eval_loss": 1.6194798946380615, |
|
"eval_runtime": 135.5719, |
|
"eval_samples_per_second": 10.983, |
|
"eval_steps_per_second": 2.751, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.193548387096774e-05, |
|
"loss": 1.5845, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.34184014797210693, |
|
"eval_loss": 1.575337290763855, |
|
"eval_runtime": 135.8178, |
|
"eval_samples_per_second": 10.963, |
|
"eval_steps_per_second": 2.746, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.059139784946237e-05, |
|
"loss": 1.5744, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.3707185983657837, |
|
"eval_loss": 1.5669182538986206, |
|
"eval_runtime": 135.433, |
|
"eval_samples_per_second": 10.994, |
|
"eval_steps_per_second": 2.754, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.924731182795699e-05, |
|
"loss": 1.5915, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.3754197359085083, |
|
"eval_loss": 1.5411657094955444, |
|
"eval_runtime": 135.3362, |
|
"eval_samples_per_second": 11.002, |
|
"eval_steps_per_second": 2.756, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.790322580645162e-05, |
|
"loss": 1.5105, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.2612491548061371, |
|
"eval_loss": 2.0037343502044678, |
|
"eval_runtime": 135.2385, |
|
"eval_samples_per_second": 11.01, |
|
"eval_steps_per_second": 2.758, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.655913978494624e-05, |
|
"loss": 1.4689, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.3626595139503479, |
|
"eval_loss": 1.5439659357070923, |
|
"eval_runtime": 135.2487, |
|
"eval_samples_per_second": 11.009, |
|
"eval_steps_per_second": 2.758, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.521505376344086e-05, |
|
"loss": 1.527, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.38616520166397095, |
|
"eval_loss": 1.540026068687439, |
|
"eval_runtime": 135.6976, |
|
"eval_samples_per_second": 10.973, |
|
"eval_steps_per_second": 2.749, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.387096774193549e-05, |
|
"loss": 1.6481, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.32975152134895325, |
|
"eval_loss": 1.6678365468978882, |
|
"eval_runtime": 135.8659, |
|
"eval_samples_per_second": 10.959, |
|
"eval_steps_per_second": 2.745, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.252688172043011e-05, |
|
"loss": 1.7504, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.29952988028526306, |
|
"eval_loss": 1.6077724695205688, |
|
"eval_runtime": 135.8936, |
|
"eval_samples_per_second": 10.957, |
|
"eval_steps_per_second": 2.745, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.118279569892473e-05, |
|
"loss": 1.3748, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.32505038380622864, |
|
"eval_loss": 1.5750231742858887, |
|
"eval_runtime": 135.7593, |
|
"eval_samples_per_second": 10.968, |
|
"eval_steps_per_second": 2.748, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.983870967741936e-05, |
|
"loss": 1.6417, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.25990596413612366, |
|
"eval_loss": 1.7033889293670654, |
|
"eval_runtime": 140.3888, |
|
"eval_samples_per_second": 10.606, |
|
"eval_steps_per_second": 2.657, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.849462365591398e-05, |
|
"loss": 1.6146, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.35191404819488525, |
|
"eval_loss": 1.6161645650863647, |
|
"eval_runtime": 136.7498, |
|
"eval_samples_per_second": 10.889, |
|
"eval_steps_per_second": 2.728, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.715053763440861e-05, |
|
"loss": 1.4896, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.37407657504081726, |
|
"eval_loss": 1.5245014429092407, |
|
"eval_runtime": 137.1791, |
|
"eval_samples_per_second": 10.854, |
|
"eval_steps_per_second": 2.719, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.580645161290323e-05, |
|
"loss": 1.4278, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.24244458973407745, |
|
"eval_loss": 1.7536966800689697, |
|
"eval_runtime": 137.1089, |
|
"eval_samples_per_second": 10.86, |
|
"eval_steps_per_second": 2.72, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.446236559139786e-05, |
|
"loss": 1.4475, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.3881799876689911, |
|
"eval_loss": 1.4769032001495361, |
|
"eval_runtime": 136.7534, |
|
"eval_samples_per_second": 10.888, |
|
"eval_steps_per_second": 2.728, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.311827956989248e-05, |
|
"loss": 1.5416, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.39489591121673584, |
|
"eval_loss": 1.4772460460662842, |
|
"eval_runtime": 137.0691, |
|
"eval_samples_per_second": 10.863, |
|
"eval_steps_per_second": 2.721, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.177419354838711e-05, |
|
"loss": 1.5997, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.4278039038181305, |
|
"eval_loss": 1.4428460597991943, |
|
"eval_runtime": 137.003, |
|
"eval_samples_per_second": 10.868, |
|
"eval_steps_per_second": 2.723, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.043010752688173e-05, |
|
"loss": 1.4337, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.41235730051994324, |
|
"eval_loss": 1.435219645500183, |
|
"eval_runtime": 137.007, |
|
"eval_samples_per_second": 10.868, |
|
"eval_steps_per_second": 2.722, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.908602150537635e-05, |
|
"loss": 1.415, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.4157152473926544, |
|
"eval_loss": 1.4404770135879517, |
|
"eval_runtime": 137.0627, |
|
"eval_samples_per_second": 10.864, |
|
"eval_steps_per_second": 2.721, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.774193548387096e-05, |
|
"loss": 1.5196, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.40429818630218506, |
|
"eval_loss": 1.4196510314941406, |
|
"eval_runtime": 136.8253, |
|
"eval_samples_per_second": 10.882, |
|
"eval_steps_per_second": 2.726, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.63978494623656e-05, |
|
"loss": 1.3866, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.37340497970581055, |
|
"eval_loss": 1.524086833000183, |
|
"eval_runtime": 137.3465, |
|
"eval_samples_per_second": 10.841, |
|
"eval_steps_per_second": 2.716, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.505376344086021e-05, |
|
"loss": 1.3041, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.40429818630218506, |
|
"eval_loss": 1.5703184604644775, |
|
"eval_runtime": 136.9308, |
|
"eval_samples_per_second": 10.874, |
|
"eval_steps_per_second": 2.724, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.370967741935485e-05, |
|
"loss": 1.3618, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.4284754991531372, |
|
"eval_loss": 1.3963350057601929, |
|
"eval_runtime": 136.0816, |
|
"eval_samples_per_second": 10.942, |
|
"eval_steps_per_second": 2.741, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.236559139784946e-05, |
|
"loss": 1.3293, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.4506380259990692, |
|
"eval_loss": 1.3478150367736816, |
|
"eval_runtime": 135.9673, |
|
"eval_samples_per_second": 10.951, |
|
"eval_steps_per_second": 2.743, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.102150537634409e-05, |
|
"loss": 1.2215, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.3841504454612732, |
|
"eval_loss": 1.5994166135787964, |
|
"eval_runtime": 136.1871, |
|
"eval_samples_per_second": 10.933, |
|
"eval_steps_per_second": 2.739, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.9677419354838715e-05, |
|
"loss": 1.6618, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.2276695817708969, |
|
"eval_loss": 1.7750705480575562, |
|
"eval_runtime": 136.1639, |
|
"eval_samples_per_second": 10.935, |
|
"eval_steps_per_second": 2.739, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.8467741935483876e-05, |
|
"loss": 1.5349, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.40362659096717834, |
|
"eval_loss": 1.6090513467788696, |
|
"eval_runtime": 136.1264, |
|
"eval_samples_per_second": 10.938, |
|
"eval_steps_per_second": 2.74, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.71236559139785e-05, |
|
"loss": 1.4037, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.4445936977863312, |
|
"eval_loss": 1.4741053581237793, |
|
"eval_runtime": 136.0584, |
|
"eval_samples_per_second": 10.944, |
|
"eval_steps_per_second": 2.741, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.577956989247311e-05, |
|
"loss": 1.4844, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.4398925304412842, |
|
"eval_loss": 1.4170390367507935, |
|
"eval_runtime": 136.1183, |
|
"eval_samples_per_second": 10.939, |
|
"eval_steps_per_second": 2.74, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.443548387096774e-05, |
|
"loss": 1.2806, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5050369501113892, |
|
"eval_loss": 1.2886841297149658, |
|
"eval_runtime": 136.1309, |
|
"eval_samples_per_second": 10.938, |
|
"eval_steps_per_second": 2.74, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.309139784946236e-05, |
|
"loss": 1.3818, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.501679003238678, |
|
"eval_loss": 1.2668293714523315, |
|
"eval_runtime": 135.9509, |
|
"eval_samples_per_second": 10.952, |
|
"eval_steps_per_second": 2.744, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.174731182795699e-05, |
|
"loss": 1.3491, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.4593687057495117, |
|
"eval_loss": 1.4720617532730103, |
|
"eval_runtime": 136.1018, |
|
"eval_samples_per_second": 10.94, |
|
"eval_steps_per_second": 2.741, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.040322580645161e-05, |
|
"loss": 1.2347, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5245131254196167, |
|
"eval_loss": 1.2187544107437134, |
|
"eval_runtime": 136.1642, |
|
"eval_samples_per_second": 10.935, |
|
"eval_steps_per_second": 2.739, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.905913978494624e-05, |
|
"loss": 1.2182, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.45668232440948486, |
|
"eval_loss": 1.3813459873199463, |
|
"eval_runtime": 136.1044, |
|
"eval_samples_per_second": 10.94, |
|
"eval_steps_per_second": 2.741, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.771505376344086e-05, |
|
"loss": 1.2513, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5204835534095764, |
|
"eval_loss": 1.2110750675201416, |
|
"eval_runtime": 136.1563, |
|
"eval_samples_per_second": 10.936, |
|
"eval_steps_per_second": 2.739, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.637096774193548e-05, |
|
"loss": 1.2447, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.546004056930542, |
|
"eval_loss": 1.2230509519577026, |
|
"eval_runtime": 136.1489, |
|
"eval_samples_per_second": 10.937, |
|
"eval_steps_per_second": 2.74, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.516129032258064e-05, |
|
"loss": 1.038, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5372733473777771, |
|
"eval_loss": 1.2562698125839233, |
|
"eval_runtime": 136.2823, |
|
"eval_samples_per_second": 10.926, |
|
"eval_steps_per_second": 2.737, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.381720430107527e-05, |
|
"loss": 1.2409, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.4936198890209198, |
|
"eval_loss": 1.344766616821289, |
|
"eval_runtime": 136.1885, |
|
"eval_samples_per_second": 10.933, |
|
"eval_steps_per_second": 2.739, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.247311827956989e-05, |
|
"loss": 1.2279, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5486903786659241, |
|
"eval_loss": 1.1971595287322998, |
|
"eval_runtime": 136.416, |
|
"eval_samples_per_second": 10.915, |
|
"eval_steps_per_second": 2.734, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.112903225806452e-05, |
|
"loss": 1.3256, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5742108821868896, |
|
"eval_loss": 1.1706324815750122, |
|
"eval_runtime": 136.371, |
|
"eval_samples_per_second": 10.919, |
|
"eval_steps_per_second": 2.735, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.978494623655914e-05, |
|
"loss": 1.2866, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5003358125686646, |
|
"eval_loss": 1.309117078781128, |
|
"eval_runtime": 136.1709, |
|
"eval_samples_per_second": 10.935, |
|
"eval_steps_per_second": 2.739, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.844086021505376e-05, |
|
"loss": 1.0574, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5500335693359375, |
|
"eval_loss": 1.2074663639068604, |
|
"eval_runtime": 136.5936, |
|
"eval_samples_per_second": 10.901, |
|
"eval_steps_per_second": 2.731, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7096774193548386e-05, |
|
"loss": 1.2744, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.5171256065368652, |
|
"eval_loss": 1.2830621004104614, |
|
"eval_runtime": 136.8418, |
|
"eval_samples_per_second": 10.881, |
|
"eval_steps_per_second": 2.726, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.575268817204301e-05, |
|
"loss": 1.0836, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5607790350914001, |
|
"eval_loss": 1.1768107414245605, |
|
"eval_runtime": 136.4906, |
|
"eval_samples_per_second": 10.909, |
|
"eval_steps_per_second": 2.733, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.4408602150537636e-05, |
|
"loss": 1.135, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5775688290596008, |
|
"eval_loss": 1.1407707929611206, |
|
"eval_runtime": 136.5812, |
|
"eval_samples_per_second": 10.902, |
|
"eval_steps_per_second": 2.731, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.306451612903226e-05, |
|
"loss": 1.1303, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5540631413459778, |
|
"eval_loss": 1.2319557666778564, |
|
"eval_runtime": 136.447, |
|
"eval_samples_per_second": 10.913, |
|
"eval_steps_per_second": 2.734, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.172043010752688e-05, |
|
"loss": 1.2068, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5795835852622986, |
|
"eval_loss": 1.1379237174987793, |
|
"eval_runtime": 139.7377, |
|
"eval_samples_per_second": 10.656, |
|
"eval_steps_per_second": 2.669, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.0376344086021508e-05, |
|
"loss": 1.1347, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5896574854850769, |
|
"eval_loss": 1.112443447113037, |
|
"eval_runtime": 135.8322, |
|
"eval_samples_per_second": 10.962, |
|
"eval_steps_per_second": 2.746, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9032258064516133e-05, |
|
"loss": 1.1846, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5802552103996277, |
|
"eval_loss": 1.1337865591049194, |
|
"eval_runtime": 136.0081, |
|
"eval_samples_per_second": 10.948, |
|
"eval_steps_per_second": 2.742, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.768817204301075e-05, |
|
"loss": 1.2409, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5789120197296143, |
|
"eval_loss": 1.1259396076202393, |
|
"eval_runtime": 136.2241, |
|
"eval_samples_per_second": 10.931, |
|
"eval_steps_per_second": 2.738, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6344086021505376e-05, |
|
"loss": 1.0664, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.6037608981132507, |
|
"eval_loss": 1.065294861793518, |
|
"eval_runtime": 136.1781, |
|
"eval_samples_per_second": 10.934, |
|
"eval_steps_per_second": 2.739, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1637, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5977165699005127, |
|
"eval_loss": 1.0549540519714355, |
|
"eval_runtime": 135.9124, |
|
"eval_samples_per_second": 10.956, |
|
"eval_steps_per_second": 2.744, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.3655913978494626e-05, |
|
"loss": 1.0707, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5715245008468628, |
|
"eval_loss": 1.0996488332748413, |
|
"eval_runtime": 136.1326, |
|
"eval_samples_per_second": 10.938, |
|
"eval_steps_per_second": 2.74, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2311827956989248e-05, |
|
"loss": 1.2258, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5977165699005127, |
|
"eval_loss": 1.080415964126587, |
|
"eval_runtime": 135.8671, |
|
"eval_samples_per_second": 10.959, |
|
"eval_steps_per_second": 2.745, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.0967741935483873e-05, |
|
"loss": 0.9256, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.580926775932312, |
|
"eval_loss": 1.1501046419143677, |
|
"eval_runtime": 135.8439, |
|
"eval_samples_per_second": 10.961, |
|
"eval_steps_per_second": 2.746, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.9623655913978494e-05, |
|
"loss": 1.1542, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5957018136978149, |
|
"eval_loss": 1.1089370250701904, |
|
"eval_runtime": 137.3702, |
|
"eval_samples_per_second": 10.839, |
|
"eval_steps_per_second": 2.715, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.827956989247312e-05, |
|
"loss": 1.3931, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5856279134750366, |
|
"eval_loss": 1.138110876083374, |
|
"eval_runtime": 135.8089, |
|
"eval_samples_per_second": 10.964, |
|
"eval_steps_per_second": 2.747, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.693548387096774e-05, |
|
"loss": 1.1117, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.6030893325805664, |
|
"eval_loss": 1.0933294296264648, |
|
"eval_runtime": 135.7612, |
|
"eval_samples_per_second": 10.968, |
|
"eval_steps_per_second": 2.747, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.5591397849462366e-05, |
|
"loss": 1.1433, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.6218938827514648, |
|
"eval_loss": 1.0175174474716187, |
|
"eval_runtime": 135.6802, |
|
"eval_samples_per_second": 10.974, |
|
"eval_steps_per_second": 2.749, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4247311827956991e-05, |
|
"loss": 1.0325, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.6239086389541626, |
|
"eval_loss": 0.9885073304176331, |
|
"eval_runtime": 135.5378, |
|
"eval_samples_per_second": 10.986, |
|
"eval_steps_per_second": 2.752, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 1.111, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.6259234547615051, |
|
"eval_loss": 1.004755973815918, |
|
"eval_runtime": 135.7521, |
|
"eval_samples_per_second": 10.969, |
|
"eval_steps_per_second": 2.748, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1559139784946236e-05, |
|
"loss": 0.8125, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.6165211796760559, |
|
"eval_loss": 1.0176496505737305, |
|
"eval_runtime": 135.5622, |
|
"eval_samples_per_second": 10.984, |
|
"eval_steps_per_second": 2.752, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0215053763440861e-05, |
|
"loss": 1.0414, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.6185359358787537, |
|
"eval_loss": 1.0289984941482544, |
|
"eval_runtime": 135.9417, |
|
"eval_samples_per_second": 10.953, |
|
"eval_steps_per_second": 2.744, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.870967741935484e-06, |
|
"loss": 1.0037, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.625251829624176, |
|
"eval_loss": 1.0268802642822266, |
|
"eval_runtime": 135.7529, |
|
"eval_samples_per_second": 10.968, |
|
"eval_steps_per_second": 2.748, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.526881720430108e-06, |
|
"loss": 0.9406, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.6272666454315186, |
|
"eval_loss": 1.0300624370574951, |
|
"eval_runtime": 137.5757, |
|
"eval_samples_per_second": 10.823, |
|
"eval_steps_per_second": 2.711, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.182795698924732e-06, |
|
"loss": 1.0129, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.6326393485069275, |
|
"eval_loss": 1.0238244533538818, |
|
"eval_runtime": 135.84, |
|
"eval_samples_per_second": 10.961, |
|
"eval_steps_per_second": 2.746, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 1.2213, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.6272666454315186, |
|
"eval_loss": 1.018078088760376, |
|
"eval_runtime": 135.9017, |
|
"eval_samples_per_second": 10.956, |
|
"eval_steps_per_second": 2.745, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.4946236559139785e-06, |
|
"loss": 1.2519, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.6265950202941895, |
|
"eval_loss": 1.0160512924194336, |
|
"eval_runtime": 135.809, |
|
"eval_samples_per_second": 10.964, |
|
"eval_steps_per_second": 2.747, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.1505376344086023e-06, |
|
"loss": 0.9932, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.6279382109642029, |
|
"eval_loss": 1.0112457275390625, |
|
"eval_runtime": 136.0311, |
|
"eval_samples_per_second": 10.946, |
|
"eval_steps_per_second": 2.742, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.064516129032258e-07, |
|
"loss": 1.0135, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6312961578369141, |
|
"eval_loss": 1.010461449623108, |
|
"eval_runtime": 136.2447, |
|
"eval_samples_per_second": 10.929, |
|
"eval_steps_per_second": 2.738, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 744, |
|
"total_flos": 1.673551838186588e+17, |
|
"train_loss": 1.320114940725347, |
|
"train_runtime": 11616.0305, |
|
"train_samples_per_second": 0.512, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"max_steps": 744, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.673551838186588e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|