|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 38, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6941, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.062219502243982046, |
|
"eval_loss": 2.654296875, |
|
"eval_runtime": 3.4086, |
|
"eval_samples_per_second": 16.722, |
|
"eval_steps_per_second": 1.173, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6914, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.062219502243982046, |
|
"eval_loss": 2.654296875, |
|
"eval_runtime": 2.4357, |
|
"eval_samples_per_second": 23.402, |
|
"eval_steps_per_second": 1.642, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.994876739510005e-05, |
|
"loss": 2.6003, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.06265016546534294, |
|
"eval_loss": 2.6015625, |
|
"eval_runtime": 2.3986, |
|
"eval_samples_per_second": 23.763, |
|
"eval_steps_per_second": 1.668, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9795419551040836e-05, |
|
"loss": 2.5603, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.0627181649213473, |
|
"eval_loss": 2.5703125, |
|
"eval_runtime": 2.5872, |
|
"eval_samples_per_second": 22.032, |
|
"eval_steps_per_second": 1.546, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9541003989089956e-05, |
|
"loss": 2.6067, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.06289949680402557, |
|
"eval_loss": 2.55078125, |
|
"eval_runtime": 2.7942, |
|
"eval_samples_per_second": 20.4, |
|
"eval_steps_per_second": 1.432, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.9187258625509518e-05, |
|
"loss": 2.5439, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.0626954984360125, |
|
"eval_loss": 2.546875, |
|
"eval_runtime": 2.8026, |
|
"eval_samples_per_second": 20.338, |
|
"eval_steps_per_second": 1.427, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.873659989982586e-05, |
|
"loss": 2.4459, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.06289949680402557, |
|
"eval_loss": 2.548828125, |
|
"eval_runtime": 2.3868, |
|
"eval_samples_per_second": 23.881, |
|
"eval_steps_per_second": 1.676, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8192106268097336e-05, |
|
"loss": 2.5439, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.06278616437735164, |
|
"eval_loss": 2.548828125, |
|
"eval_runtime": 3.0044, |
|
"eval_samples_per_second": 18.972, |
|
"eval_steps_per_second": 1.331, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.7557497173937928e-05, |
|
"loss": 2.6125, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.06317149462804297, |
|
"eval_loss": 2.54296875, |
|
"eval_runtime": 2.9895, |
|
"eval_samples_per_second": 19.067, |
|
"eval_steps_per_second": 1.338, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6837107640945904e-05, |
|
"loss": 2.4583, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.06262749898000816, |
|
"eval_loss": 2.5390625, |
|
"eval_runtime": 3.2063, |
|
"eval_samples_per_second": 17.778, |
|
"eval_steps_per_second": 1.248, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.6035858660096975e-05, |
|
"loss": 2.5088, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.06287683031869079, |
|
"eval_loss": 2.533203125, |
|
"eval_runtime": 2.5988, |
|
"eval_samples_per_second": 21.933, |
|
"eval_steps_per_second": 1.539, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.5159223574386117e-05, |
|
"loss": 2.6033, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.06308082868670384, |
|
"eval_loss": 2.521484375, |
|
"eval_runtime": 2.595, |
|
"eval_samples_per_second": 21.965, |
|
"eval_steps_per_second": 1.541, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.4213190690345018e-05, |
|
"loss": 2.5037, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.0632848270547169, |
|
"eval_loss": 2.515625, |
|
"eval_runtime": 3.1996, |
|
"eval_samples_per_second": 17.815, |
|
"eval_steps_per_second": 1.25, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.320422237183641e-05, |
|
"loss": 2.6033, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.06339815948139081, |
|
"eval_loss": 2.5078125, |
|
"eval_runtime": 2.4053, |
|
"eval_samples_per_second": 23.698, |
|
"eval_steps_per_second": 1.663, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2139210895556104e-05, |
|
"loss": 2.6023, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.06339815948139081, |
|
"eval_loss": 2.501953125, |
|
"eval_runtime": 2.804, |
|
"eval_samples_per_second": 20.328, |
|
"eval_steps_per_second": 1.427, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.1025431369794546e-05, |
|
"loss": 2.5034, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.06333016002538647, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 2.1988, |
|
"eval_samples_per_second": 25.923, |
|
"eval_steps_per_second": 1.819, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9870492038070255e-05, |
|
"loss": 2.4353, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.0631488281427082, |
|
"eval_loss": 2.490234375, |
|
"eval_runtime": 2.5999, |
|
"eval_samples_per_second": 21.924, |
|
"eval_steps_per_second": 1.538, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.8682282307111988e-05, |
|
"loss": 2.6262, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.06333016002538647, |
|
"eval_loss": 2.486328125, |
|
"eval_runtime": 2.9949, |
|
"eval_samples_per_second": 19.032, |
|
"eval_steps_per_second": 1.336, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.746891885421101e-05, |
|
"loss": 2.5613, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.0631261616573734, |
|
"eval_loss": 2.484375, |
|
"eval_runtime": 3.2109, |
|
"eval_samples_per_second": 17.752, |
|
"eval_steps_per_second": 1.246, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.623869018208499e-05, |
|
"loss": 2.115, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.06317149462804297, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 3.1997, |
|
"eval_samples_per_second": 17.814, |
|
"eval_steps_per_second": 1.25, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.0885, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.06351149190806474, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 2.7957, |
|
"eval_samples_per_second": 20.388, |
|
"eval_steps_per_second": 1.431, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.9777, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.06351149190806474, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 2.6375, |
|
"eval_samples_per_second": 21.612, |
|
"eval_steps_per_second": 1.517, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.3761309817915017e-05, |
|
"loss": 2.1053, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.06335282651072124, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 2.3919, |
|
"eval_samples_per_second": 23.831, |
|
"eval_steps_per_second": 1.672, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2531081145788989e-05, |
|
"loss": 1.9324, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.06364749082007344, |
|
"eval_loss": 2.482421875, |
|
"eval_runtime": 2.7945, |
|
"eval_samples_per_second": 20.397, |
|
"eval_steps_per_second": 1.431, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1317717692888014e-05, |
|
"loss": 1.9122, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.06369282379074301, |
|
"eval_loss": 2.48828125, |
|
"eval_runtime": 2.7869, |
|
"eval_samples_per_second": 20.453, |
|
"eval_steps_per_second": 1.435, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0129507961929749e-05, |
|
"loss": 2.1058, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.06389682215875607, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 2.4103, |
|
"eval_samples_per_second": 23.648, |
|
"eval_steps_per_second": 1.66, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.974568630205462e-06, |
|
"loss": 1.9803, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.06389682215875607, |
|
"eval_loss": 2.501953125, |
|
"eval_runtime": 2.7926, |
|
"eval_samples_per_second": 20.411, |
|
"eval_steps_per_second": 1.432, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.860789104443897e-06, |
|
"loss": 1.6486, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.06378348973208214, |
|
"eval_loss": 2.509765625, |
|
"eval_runtime": 3.2131, |
|
"eval_samples_per_second": 17.74, |
|
"eval_steps_per_second": 1.245, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.795777628163599e-06, |
|
"loss": 1.8983, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.06357949136406908, |
|
"eval_loss": 2.515625, |
|
"eval_runtime": 3.1898, |
|
"eval_samples_per_second": 17.869, |
|
"eval_steps_per_second": 1.254, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.786809309654983e-06, |
|
"loss": 1.8105, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.06335282651072124, |
|
"eval_loss": 2.521484375, |
|
"eval_runtime": 3.1961, |
|
"eval_samples_per_second": 17.834, |
|
"eval_steps_per_second": 1.252, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.840776425613887e-06, |
|
"loss": 1.9916, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.5234375, |
|
"eval_runtime": 2.7902, |
|
"eval_samples_per_second": 20.428, |
|
"eval_steps_per_second": 1.434, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.964141339903026e-06, |
|
"loss": 1.886, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.06346615893739517, |
|
"eval_loss": 2.525390625, |
|
"eval_runtime": 2.6003, |
|
"eval_samples_per_second": 21.921, |
|
"eval_steps_per_second": 1.538, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.162892359054098e-06, |
|
"loss": 1.8013, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.52734375, |
|
"eval_runtime": 2.3881, |
|
"eval_samples_per_second": 23.868, |
|
"eval_steps_per_second": 1.675, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.442502826062072e-06, |
|
"loss": 1.8435, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.525390625, |
|
"eval_runtime": 3.2047, |
|
"eval_samples_per_second": 17.786, |
|
"eval_steps_per_second": 1.248, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.8078937319026655e-06, |
|
"loss": 2.1229, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.06348882542272995, |
|
"eval_loss": 2.5234375, |
|
"eval_runtime": 2.4087, |
|
"eval_samples_per_second": 23.664, |
|
"eval_steps_per_second": 1.661, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.2634001001741375e-06, |
|
"loss": 1.8739, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.06357949136406908, |
|
"eval_loss": 2.5234375, |
|
"eval_runtime": 3.196, |
|
"eval_samples_per_second": 17.835, |
|
"eval_steps_per_second": 1.252, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.127413744904805e-07, |
|
"loss": 1.7528, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.06369282379074301, |
|
"eval_loss": 2.5234375, |
|
"eval_runtime": 2.1908, |
|
"eval_samples_per_second": 26.018, |
|
"eval_steps_per_second": 1.826, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.589960109100444e-07, |
|
"loss": 1.9462, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.06360215784940387, |
|
"eval_loss": 2.521484375, |
|
"eval_runtime": 2.6164, |
|
"eval_samples_per_second": 21.786, |
|
"eval_steps_per_second": 1.529, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 38, |
|
"total_flos": 2058056761344.0, |
|
"train_loss": 2.2499421772203947, |
|
"train_runtime": 531.0436, |
|
"train_samples_per_second": 1.141, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"max_steps": 38, |
|
"num_train_epochs": 2, |
|
"total_flos": 2058056761344.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|