|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.57446808510638, |
|
"eval_steps": 500, |
|
"global_step": 675, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9999906714572185e-05, |
|
"loss": 2.0607, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999766789911305e-05, |
|
"loss": 2.0068, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.999067203154777e-05, |
|
"loss": 1.8243, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6684231758117676, |
|
"eval_runtime": 4.0605, |
|
"eval_samples_per_second": 20.441, |
|
"eval_steps_per_second": 1.478, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.9979013702509664e-05, |
|
"loss": 1.7052, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.99626950870707e-05, |
|
"loss": 1.6022, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.994171922976348e-05, |
|
"loss": 1.5291, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.400346040725708, |
|
"eval_runtime": 3.9709, |
|
"eval_samples_per_second": 20.902, |
|
"eval_steps_per_second": 1.511, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.991609004401324e-05, |
|
"loss": 1.4168, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.988581231140772e-05, |
|
"loss": 1.312, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.985089168080509e-05, |
|
"loss": 1.2355, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.1807862520217896, |
|
"eval_runtime": 3.9823, |
|
"eval_samples_per_second": 20.842, |
|
"eval_steps_per_second": 1.507, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.981133466728004e-05, |
|
"loss": 1.1754, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.976714865090827e-05, |
|
"loss": 1.1393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 1.0949289798736572, |
|
"eval_runtime": 3.9683, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 1.512, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.9718341875389625e-05, |
|
"loss": 1.123, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 4.966492344651005e-05, |
|
"loss": 1.0919, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.960690333044279e-05, |
|
"loss": 1.0659, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 1.045737624168396, |
|
"eval_runtime": 3.9807, |
|
"eval_samples_per_second": 20.85, |
|
"eval_steps_per_second": 1.507, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.9544292351888966e-05, |
|
"loss": 1.0601, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 4.947710219205808e-05, |
|
"loss": 1.0387, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.9405345386488614e-05, |
|
"loss": 1.0196, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 1.0065257549285889, |
|
"eval_runtime": 3.9679, |
|
"eval_samples_per_second": 20.918, |
|
"eval_steps_per_second": 1.512, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 4.9329035322709386e-05, |
|
"loss": 0.9975, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.924818623774178e-05, |
|
"loss": 0.9831, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 0.9685614109039307, |
|
"eval_runtime": 3.9721, |
|
"eval_samples_per_second": 20.896, |
|
"eval_steps_per_second": 1.511, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.916281321544362e-05, |
|
"loss": 0.9791, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 4.907293218369499e-05, |
|
"loss": 0.9494, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 4.897855991142658e-05, |
|
"loss": 0.9281, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 0.9254654049873352, |
|
"eval_runtime": 3.9691, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 1.512, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 4.88797140054912e-05, |
|
"loss": 0.9186, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 0.8878, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.8668675889776095e-05, |
|
"loss": 0.8678, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 0.8814197182655334, |
|
"eval_runtime": 3.978, |
|
"eval_samples_per_second": 20.865, |
|
"eval_steps_per_second": 1.508, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.855652305297052e-05, |
|
"loss": 0.8535, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 4.843997532110051e-05, |
|
"loss": 0.816, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.831905443825159e-05, |
|
"loss": 0.8054, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 0.8274821639060974, |
|
"eval_runtime": 3.9679, |
|
"eval_samples_per_second": 20.918, |
|
"eval_steps_per_second": 1.512, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 4.819378296439961e-05, |
|
"loss": 0.7717, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.806418427120179e-05, |
|
"loss": 0.7683, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_loss": 0.7860919237136841, |
|
"eval_runtime": 3.9806, |
|
"eval_samples_per_second": 20.851, |
|
"eval_steps_per_second": 1.507, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 4.793028253763633e-05, |
|
"loss": 0.741, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.779210274549134e-05, |
|
"loss": 0.7057, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 4.76496706747041e-05, |
|
"loss": 0.6906, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_loss": 0.7272327542304993, |
|
"eval_runtime": 3.9685, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 1.512, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 4.750301289855128e-05, |
|
"loss": 0.673, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 4.735215677869128e-05, |
|
"loss": 0.6411, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 4.719713046005938e-05, |
|
"loss": 0.6246, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"eval_loss": 0.6794944405555725, |
|
"eval_runtime": 3.9823, |
|
"eval_samples_per_second": 20.842, |
|
"eval_steps_per_second": 1.507, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 4.703796286561679e-05, |
|
"loss": 0.6086, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.687468369095457e-05, |
|
"loss": 0.5813, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 0.6364239454269409, |
|
"eval_runtime": 3.9657, |
|
"eval_samples_per_second": 20.93, |
|
"eval_steps_per_second": 1.513, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 4.6707323398753346e-05, |
|
"loss": 0.5586, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 4.65359132131e-05, |
|
"loss": 0.5556, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 4.6360485113662216e-05, |
|
"loss": 0.5253, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 0.6078140139579773, |
|
"eval_runtime": 3.9649, |
|
"eval_samples_per_second": 20.934, |
|
"eval_steps_per_second": 1.513, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 4.618107182972209e-05, |
|
"loss": 0.5329, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 4.599770683406991e-05, |
|
"loss": 0.4948, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 4.581042433675921e-05, |
|
"loss": 0.5149, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_loss": 0.5811336636543274, |
|
"eval_runtime": 3.9681, |
|
"eval_samples_per_second": 20.917, |
|
"eval_steps_per_second": 1.512, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 4.5619259278724214e-05, |
|
"loss": 0.4857, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 4.542424732526105e-05, |
|
"loss": 0.4949, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"eval_loss": 0.5605343580245972, |
|
"eval_runtime": 3.9813, |
|
"eval_samples_per_second": 20.847, |
|
"eval_steps_per_second": 1.507, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.4737, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 4.5022828974986044e-05, |
|
"loss": 0.4675, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 4.4816497470021454e-05, |
|
"loss": 0.4644, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"eval_loss": 0.5462371110916138, |
|
"eval_runtime": 3.9643, |
|
"eval_samples_per_second": 20.937, |
|
"eval_steps_per_second": 1.514, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 4.4606468839350785e-05, |
|
"loss": 0.4565, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 4.43927822676105e-05, |
|
"loss": 0.4365, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 4.417547762189207e-05, |
|
"loss": 0.458, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"eval_loss": 0.5346133708953857, |
|
"eval_runtime": 3.9682, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 1.512, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 4.395459544430407e-05, |
|
"loss": 0.449, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 4.373017694440827e-05, |
|
"loss": 0.4401, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 4.35022639915313e-05, |
|
"loss": 0.4294, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_loss": 0.5201942920684814, |
|
"eval_runtime": 3.9685, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 1.512, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 4.3270899106953105e-05, |
|
"loss": 0.4367, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 4.3036125455973896e-05, |
|
"loss": 0.4143, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"eval_loss": 0.5177348852157593, |
|
"eval_runtime": 3.9827, |
|
"eval_samples_per_second": 20.84, |
|
"eval_steps_per_second": 1.507, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 4.279798683986084e-05, |
|
"loss": 0.4244, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 4.2556527687676186e-05, |
|
"loss": 0.4096, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 4.231179304798815e-05, |
|
"loss": 0.4161, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"eval_loss": 0.5107729434967041, |
|
"eval_runtime": 3.9674, |
|
"eval_samples_per_second": 20.92, |
|
"eval_steps_per_second": 1.512, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"learning_rate": 4.206382858046636e-05, |
|
"loss": 0.4075, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 4.181268054736318e-05, |
|
"loss": 0.4002, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 4.1558395804882695e-05, |
|
"loss": 0.4128, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"eval_loss": 0.5056843757629395, |
|
"eval_runtime": 3.9783, |
|
"eval_samples_per_second": 20.863, |
|
"eval_steps_per_second": 1.508, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 4.130102179443877e-05, |
|
"loss": 0.3928, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 4.1040606533804024e-05, |
|
"loss": 0.4055, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 23.57, |
|
"eval_loss": 0.5070647597312927, |
|
"eval_runtime": 3.9713, |
|
"eval_samples_per_second": 20.9, |
|
"eval_steps_per_second": 1.511, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 4.077719860815132e-05, |
|
"loss": 0.4021, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 4.051084716098921e-05, |
|
"loss": 0.3999, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.47, |
|
"learning_rate": 4.0241601884993366e-05, |
|
"loss": 0.3937, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"eval_loss": 0.505768895149231, |
|
"eval_runtime": 3.9783, |
|
"eval_samples_per_second": 20.863, |
|
"eval_steps_per_second": 1.508, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 3.996951301273557e-05, |
|
"loss": 0.3927, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 0.377, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 3.941700805287168e-05, |
|
"loss": 0.3967, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"eval_loss": 0.5016723871231079, |
|
"eval_runtime": 3.9654, |
|
"eval_samples_per_second": 20.931, |
|
"eval_steps_per_second": 1.513, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 3.913669504505015e-05, |
|
"loss": 0.3859, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 3.885374458130438e-05, |
|
"loss": 0.3754, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 0.4997941851615906, |
|
"eval_runtime": 3.9766, |
|
"eval_samples_per_second": 20.872, |
|
"eval_steps_per_second": 1.509, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 3.856820945115655e-05, |
|
"loss": 0.3853, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 3.828014292634509e-05, |
|
"loss": 0.3852, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 3.798959875088584e-05, |
|
"loss": 0.3742, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"eval_loss": 0.5018514394760132, |
|
"eval_runtime": 3.9719, |
|
"eval_samples_per_second": 20.897, |
|
"eval_steps_per_second": 1.511, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 3.769663113104516e-05, |
|
"loss": 0.3706, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 3.74012947252267e-05, |
|
"loss": 0.3759, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 28.51, |
|
"learning_rate": 3.7103644633774014e-05, |
|
"loss": 0.3756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 28.55, |
|
"eval_loss": 0.5018544793128967, |
|
"eval_runtime": 3.9835, |
|
"eval_samples_per_second": 20.836, |
|
"eval_steps_per_second": 1.506, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 29.15, |
|
"learning_rate": 3.680373638869047e-05, |
|
"loss": 0.3688, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 29.36, |
|
"learning_rate": 3.6501625943278805e-05, |
|
"loss": 0.3764, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 3.619736966170205e-05, |
|
"loss": 0.3652, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"eval_loss": 0.5060749650001526, |
|
"eval_runtime": 3.9634, |
|
"eval_samples_per_second": 20.942, |
|
"eval_steps_per_second": 1.514, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 3.589102430846773e-05, |
|
"loss": 0.3688, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 3.5582647037837445e-05, |
|
"loss": 0.3597, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 30.55, |
|
"eval_loss": 0.5076042413711548, |
|
"eval_runtime": 3.9772, |
|
"eval_samples_per_second": 20.869, |
|
"eval_steps_per_second": 1.509, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"learning_rate": 3.527229538316371e-05, |
|
"loss": 0.3635, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 31.28, |
|
"learning_rate": 3.496002724615604e-05, |
|
"loss": 0.3564, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 31.49, |
|
"learning_rate": 3.464590088607839e-05, |
|
"loss": 0.3609, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 31.57, |
|
"eval_loss": 0.5078853964805603, |
|
"eval_runtime": 3.9577, |
|
"eval_samples_per_second": 20.972, |
|
"eval_steps_per_second": 1.516, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"learning_rate": 3.4329974908879783e-05, |
|
"loss": 0.3679, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"learning_rate": 3.401230825626037e-05, |
|
"loss": 0.3498, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"learning_rate": 3.369296019467473e-05, |
|
"loss": 0.3581, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"eval_loss": 0.5108149647712708, |
|
"eval_runtime": 3.9725, |
|
"eval_samples_per_second": 20.894, |
|
"eval_steps_per_second": 1.51, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 33.19, |
|
"learning_rate": 3.3371990304274656e-05, |
|
"loss": 0.3623, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"learning_rate": 3.304945846779346e-05, |
|
"loss": 0.3426, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 33.57, |
|
"eval_loss": 0.511660635471344, |
|
"eval_runtime": 3.9737, |
|
"eval_samples_per_second": 20.887, |
|
"eval_steps_per_second": 1.51, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 34.04, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 0.3469, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"learning_rate": 3.239994993334059e-05, |
|
"loss": 0.3513, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 34.47, |
|
"learning_rate": 3.207309441292325e-05, |
|
"loss": 0.3481, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"eval_loss": 0.5140624046325684, |
|
"eval_runtime": 3.9754, |
|
"eval_samples_per_second": 20.878, |
|
"eval_steps_per_second": 1.509, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 3.1744919278925605e-05, |
|
"loss": 0.3408, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 35.32, |
|
"learning_rate": 3.1415485758349346e-05, |
|
"loss": 0.3469, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 35.53, |
|
"learning_rate": 3.1084855312970896e-05, |
|
"loss": 0.3435, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"eval_loss": 0.5150405168533325, |
|
"eval_runtime": 3.9656, |
|
"eval_samples_per_second": 20.93, |
|
"eval_steps_per_second": 1.513, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 36.17, |
|
"learning_rate": 3.075308962787466e-05, |
|
"loss": 0.3419, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 3.0420250599944523e-05, |
|
"loss": 0.3317, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 0.524531900882721, |
|
"eval_runtime": 3.975, |
|
"eval_samples_per_second": 20.881, |
|
"eval_steps_per_second": 1.509, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 3.008640032631585e-05, |
|
"loss": 0.342, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"learning_rate": 2.9751601092790184e-05, |
|
"loss": 0.3352, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 37.45, |
|
"learning_rate": 2.9415915362214692e-05, |
|
"loss": 0.3387, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 37.57, |
|
"eval_loss": 0.5238548517227173, |
|
"eval_runtime": 3.9677, |
|
"eval_samples_per_second": 20.919, |
|
"eval_steps_per_second": 1.512, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 38.09, |
|
"learning_rate": 2.907940576282856e-05, |
|
"loss": 0.3257, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 2.874213507657861e-05, |
|
"loss": 0.3316, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 38.51, |
|
"learning_rate": 2.840416622740617e-05, |
|
"loss": 0.332, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 38.55, |
|
"eval_loss": 0.5318763852119446, |
|
"eval_runtime": 3.9794, |
|
"eval_samples_per_second": 20.858, |
|
"eval_steps_per_second": 1.508, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"learning_rate": 2.8065562269507463e-05, |
|
"loss": 0.3178, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 2.7726386375569748e-05, |
|
"loss": 0.3287, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"learning_rate": 2.7386701824985255e-05, |
|
"loss": 0.3334, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"eval_loss": 0.5342416167259216, |
|
"eval_runtime": 3.9681, |
|
"eval_samples_per_second": 20.917, |
|
"eval_steps_per_second": 1.512, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 40.21, |
|
"learning_rate": 2.7046571992045334e-05, |
|
"loss": 0.3303, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 2.6706060334116777e-05, |
|
"loss": 0.323, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 40.55, |
|
"eval_loss": 0.538831353187561, |
|
"eval_runtime": 3.9807, |
|
"eval_samples_per_second": 20.851, |
|
"eval_steps_per_second": 1.507, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 41.06, |
|
"learning_rate": 2.636523037980275e-05, |
|
"loss": 0.3103, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 41.28, |
|
"learning_rate": 2.6024145717090358e-05, |
|
"loss": 0.3232, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 41.49, |
|
"learning_rate": 2.5682869981487152e-05, |
|
"loss": 0.3144, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 41.57, |
|
"eval_loss": 0.5423159599304199, |
|
"eval_runtime": 3.9774, |
|
"eval_samples_per_second": 20.868, |
|
"eval_steps_per_second": 1.509, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"learning_rate": 2.5341466844148775e-05, |
|
"loss": 0.3109, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 42.34, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3162, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"learning_rate": 2.4658533155851228e-05, |
|
"loss": 0.3092, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"eval_loss": 0.54653400182724, |
|
"eval_runtime": 3.9802, |
|
"eval_samples_per_second": 20.853, |
|
"eval_steps_per_second": 1.507, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 43.19, |
|
"learning_rate": 2.431713001851286e-05, |
|
"loss": 0.3175, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 2.3975854282909644e-05, |
|
"loss": 0.3084, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 43.57, |
|
"eval_loss": 0.5480648875236511, |
|
"eval_runtime": 3.9672, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 1.512, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 44.04, |
|
"learning_rate": 2.3634769620197254e-05, |
|
"loss": 0.3039, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 44.26, |
|
"learning_rate": 2.329393966588323e-05, |
|
"loss": 0.2977, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 44.47, |
|
"learning_rate": 2.295342800795468e-05, |
|
"loss": 0.3091, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 44.55, |
|
"eval_loss": 0.5604838728904724, |
|
"eval_runtime": 3.9783, |
|
"eval_samples_per_second": 20.863, |
|
"eval_steps_per_second": 1.508, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 45.11, |
|
"learning_rate": 2.261329817501475e-05, |
|
"loss": 0.3087, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 45.32, |
|
"learning_rate": 2.2273613624430255e-05, |
|
"loss": 0.2994, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 45.53, |
|
"learning_rate": 2.1934437730492543e-05, |
|
"loss": 0.3044, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"eval_loss": 0.560636579990387, |
|
"eval_runtime": 3.9626, |
|
"eval_samples_per_second": 20.946, |
|
"eval_steps_per_second": 1.514, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 46.17, |
|
"learning_rate": 2.159583377259384e-05, |
|
"loss": 0.2867, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 2.1257864923421404e-05, |
|
"loss": 0.303, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"eval_loss": 0.5683414340019226, |
|
"eval_runtime": 3.9884, |
|
"eval_samples_per_second": 20.81, |
|
"eval_steps_per_second": 1.504, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"learning_rate": 2.092059423717145e-05, |
|
"loss": 0.2971, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 47.23, |
|
"learning_rate": 2.0584084637785317e-05, |
|
"loss": 0.2923, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 2.0248398907209826e-05, |
|
"loss": 0.2896, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 47.57, |
|
"eval_loss": 0.572201669216156, |
|
"eval_runtime": 3.9766, |
|
"eval_samples_per_second": 20.872, |
|
"eval_steps_per_second": 1.509, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 48.09, |
|
"learning_rate": 1.991359967368416e-05, |
|
"loss": 0.2963, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 48.3, |
|
"learning_rate": 1.957974940005548e-05, |
|
"loss": 0.2849, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 48.51, |
|
"learning_rate": 1.9246910372125342e-05, |
|
"loss": 0.2854, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 48.55, |
|
"eval_loss": 0.5778339505195618, |
|
"eval_runtime": 3.9935, |
|
"eval_samples_per_second": 20.784, |
|
"eval_steps_per_second": 1.502, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 1.8915144687029106e-05, |
|
"loss": 0.2884, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 49.36, |
|
"learning_rate": 1.8584514241650666e-05, |
|
"loss": 0.2767, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 1.825508072107439e-05, |
|
"loss": 0.291, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"eval_loss": 0.5825899839401245, |
|
"eval_runtime": 3.9815, |
|
"eval_samples_per_second": 20.846, |
|
"eval_steps_per_second": 1.507, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"step": 675, |
|
"total_flos": 6306831714484224.0, |
|
"train_loss": 0.5519325028525458, |
|
"train_runtime": 6501.4714, |
|
"train_samples_per_second": 5.737, |
|
"train_steps_per_second": 0.177 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1150, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 6306831714484224.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|