{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 38, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 3e-05, "loss": 2.6941, "step": 1 }, { "epoch": 0.05, "eval_accuracy": 0.062219502243982046, "eval_loss": 2.654296875, "eval_runtime": 3.4086, "eval_samples_per_second": 16.722, "eval_steps_per_second": 1.173, "step": 1 }, { "epoch": 0.11, "learning_rate": 3e-05, "loss": 2.6914, "step": 2 }, { "epoch": 0.11, "eval_accuracy": 0.062219502243982046, "eval_loss": 2.654296875, "eval_runtime": 2.4357, "eval_samples_per_second": 23.402, "eval_steps_per_second": 1.642, "step": 2 }, { "epoch": 0.16, "learning_rate": 2.994876739510005e-05, "loss": 2.6003, "step": 3 }, { "epoch": 0.16, "eval_accuracy": 0.06265016546534294, "eval_loss": 2.6015625, "eval_runtime": 2.3986, "eval_samples_per_second": 23.763, "eval_steps_per_second": 1.668, "step": 3 }, { "epoch": 0.21, "learning_rate": 2.9795419551040836e-05, "loss": 2.5603, "step": 4 }, { "epoch": 0.21, "eval_accuracy": 0.0627181649213473, "eval_loss": 2.5703125, "eval_runtime": 2.5872, "eval_samples_per_second": 22.032, "eval_steps_per_second": 1.546, "step": 4 }, { "epoch": 0.26, "learning_rate": 2.9541003989089956e-05, "loss": 2.6067, "step": 5 }, { "epoch": 0.26, "eval_accuracy": 0.06289949680402557, "eval_loss": 2.55078125, "eval_runtime": 2.7942, "eval_samples_per_second": 20.4, "eval_steps_per_second": 1.432, "step": 5 }, { "epoch": 0.32, "learning_rate": 2.9187258625509518e-05, "loss": 2.5439, "step": 6 }, { "epoch": 0.32, "eval_accuracy": 0.0626954984360125, "eval_loss": 2.546875, "eval_runtime": 2.8026, "eval_samples_per_second": 20.338, "eval_steps_per_second": 1.427, "step": 6 }, { "epoch": 0.37, "learning_rate": 2.873659989982586e-05, "loss": 2.4459, "step": 7 }, { "epoch": 0.37, "eval_accuracy": 0.06289949680402557, "eval_loss": 2.548828125, "eval_runtime": 2.3868, "eval_samples_per_second": 23.881, "eval_steps_per_second": 1.676, "step": 7 }, { "epoch": 0.42, "learning_rate": 2.8192106268097336e-05, "loss": 2.5439, "step": 8 }, { "epoch": 0.42, "eval_accuracy": 0.06278616437735164, "eval_loss": 2.548828125, "eval_runtime": 3.0044, "eval_samples_per_second": 18.972, "eval_steps_per_second": 1.331, "step": 8 }, { "epoch": 0.47, "learning_rate": 2.7557497173937928e-05, "loss": 2.6125, "step": 9 }, { "epoch": 0.47, "eval_accuracy": 0.06317149462804297, "eval_loss": 2.54296875, "eval_runtime": 2.9895, "eval_samples_per_second": 19.067, "eval_steps_per_second": 1.338, "step": 9 }, { "epoch": 0.53, "learning_rate": 2.6837107640945904e-05, "loss": 2.4583, "step": 10 }, { "epoch": 0.53, "eval_accuracy": 0.06262749898000816, "eval_loss": 2.5390625, "eval_runtime": 3.2063, "eval_samples_per_second": 17.778, "eval_steps_per_second": 1.248, "step": 10 }, { "epoch": 0.58, "learning_rate": 2.6035858660096975e-05, "loss": 2.5088, "step": 11 }, { "epoch": 0.58, "eval_accuracy": 0.06287683031869079, "eval_loss": 2.533203125, "eval_runtime": 2.5988, "eval_samples_per_second": 21.933, "eval_steps_per_second": 1.539, "step": 11 }, { "epoch": 0.63, "learning_rate": 2.5159223574386117e-05, "loss": 2.6033, "step": 12 }, { "epoch": 0.63, "eval_accuracy": 0.06308082868670384, "eval_loss": 2.521484375, "eval_runtime": 2.595, "eval_samples_per_second": 21.965, "eval_steps_per_second": 1.541, "step": 12 }, { "epoch": 0.68, "learning_rate": 2.4213190690345018e-05, "loss": 2.5037, "step": 13 }, { "epoch": 0.68, "eval_accuracy": 0.0632848270547169, "eval_loss": 2.515625, "eval_runtime": 3.1996, "eval_samples_per_second": 17.815, "eval_steps_per_second": 1.25, "step": 13 }, { "epoch": 0.74, "learning_rate": 2.320422237183641e-05, "loss": 2.6033, "step": 14 }, { "epoch": 0.74, "eval_accuracy": 0.06339815948139081, "eval_loss": 2.5078125, "eval_runtime": 2.4053, "eval_samples_per_second": 23.698, "eval_steps_per_second": 1.663, "step": 14 }, { "epoch": 0.79, "learning_rate": 2.2139210895556104e-05, "loss": 2.6023, "step": 15 }, { "epoch": 0.79, "eval_accuracy": 0.06339815948139081, "eval_loss": 2.501953125, "eval_runtime": 2.804, "eval_samples_per_second": 20.328, "eval_steps_per_second": 1.427, "step": 15 }, { "epoch": 0.84, "learning_rate": 2.1025431369794546e-05, "loss": 2.5034, "step": 16 }, { "epoch": 0.84, "eval_accuracy": 0.06333016002538647, "eval_loss": 2.49609375, "eval_runtime": 2.1988, "eval_samples_per_second": 25.923, "eval_steps_per_second": 1.819, "step": 16 }, { "epoch": 0.89, "learning_rate": 1.9870492038070255e-05, "loss": 2.4353, "step": 17 }, { "epoch": 0.89, "eval_accuracy": 0.0631488281427082, "eval_loss": 2.490234375, "eval_runtime": 2.5999, "eval_samples_per_second": 21.924, "eval_steps_per_second": 1.538, "step": 17 }, { "epoch": 0.95, "learning_rate": 1.8682282307111988e-05, "loss": 2.6262, "step": 18 }, { "epoch": 0.95, "eval_accuracy": 0.06333016002538647, "eval_loss": 2.486328125, "eval_runtime": 2.9949, "eval_samples_per_second": 19.032, "eval_steps_per_second": 1.336, "step": 18 }, { "epoch": 1.0, "learning_rate": 1.746891885421101e-05, "loss": 2.5613, "step": 19 }, { "epoch": 1.0, "eval_accuracy": 0.0631261616573734, "eval_loss": 2.484375, "eval_runtime": 3.2109, "eval_samples_per_second": 17.752, "eval_steps_per_second": 1.246, "step": 19 }, { "epoch": 1.05, "learning_rate": 1.623869018208499e-05, "loss": 2.115, "step": 20 }, { "epoch": 1.05, "eval_accuracy": 0.06317149462804297, "eval_loss": 2.48046875, "eval_runtime": 3.1997, "eval_samples_per_second": 17.814, "eval_steps_per_second": 1.25, "step": 20 }, { "epoch": 1.11, "learning_rate": 1.5e-05, "loss": 2.0885, "step": 21 }, { "epoch": 1.11, "eval_accuracy": 0.06351149190806474, "eval_loss": 2.48046875, "eval_runtime": 2.7957, "eval_samples_per_second": 20.388, "eval_steps_per_second": 1.431, "step": 21 }, { "epoch": 1.16, "learning_rate": 1.5e-05, "loss": 1.9777, "step": 22 }, { "epoch": 1.16, "eval_accuracy": 0.06351149190806474, "eval_loss": 2.48046875, "eval_runtime": 2.6375, "eval_samples_per_second": 21.612, "eval_steps_per_second": 1.517, "step": 22 }, { "epoch": 1.21, "learning_rate": 1.3761309817915017e-05, "loss": 2.1053, "step": 23 }, { "epoch": 1.21, "eval_accuracy": 0.06335282651072124, "eval_loss": 2.48046875, "eval_runtime": 2.3919, "eval_samples_per_second": 23.831, "eval_steps_per_second": 1.672, "step": 23 }, { "epoch": 1.26, "learning_rate": 1.2531081145788989e-05, "loss": 1.9324, "step": 24 }, { "epoch": 1.26, "eval_accuracy": 0.06364749082007344, "eval_loss": 2.482421875, "eval_runtime": 2.7945, "eval_samples_per_second": 20.397, "eval_steps_per_second": 1.431, "step": 24 }, { "epoch": 1.32, "learning_rate": 1.1317717692888014e-05, "loss": 1.9122, "step": 25 }, { "epoch": 1.32, "eval_accuracy": 0.06369282379074301, "eval_loss": 2.48828125, "eval_runtime": 2.7869, "eval_samples_per_second": 20.453, "eval_steps_per_second": 1.435, "step": 25 }, { "epoch": 1.37, "learning_rate": 1.0129507961929749e-05, "loss": 2.1058, "step": 26 }, { "epoch": 1.37, "eval_accuracy": 0.06389682215875607, "eval_loss": 2.49609375, "eval_runtime": 2.4103, "eval_samples_per_second": 23.648, "eval_steps_per_second": 1.66, "step": 26 }, { "epoch": 1.42, "learning_rate": 8.974568630205462e-06, "loss": 1.9803, "step": 27 }, { "epoch": 1.42, "eval_accuracy": 0.06389682215875607, "eval_loss": 2.501953125, "eval_runtime": 2.7926, "eval_samples_per_second": 20.411, "eval_steps_per_second": 1.432, "step": 27 }, { "epoch": 1.47, "learning_rate": 7.860789104443897e-06, "loss": 1.6486, "step": 28 }, { "epoch": 1.47, "eval_accuracy": 0.06378348973208214, "eval_loss": 2.509765625, "eval_runtime": 3.2131, "eval_samples_per_second": 17.74, "eval_steps_per_second": 1.245, "step": 28 }, { "epoch": 1.53, "learning_rate": 6.795777628163599e-06, "loss": 1.8983, "step": 29 }, { "epoch": 1.53, "eval_accuracy": 0.06357949136406908, "eval_loss": 2.515625, "eval_runtime": 3.1898, "eval_samples_per_second": 17.869, "eval_steps_per_second": 1.254, "step": 29 }, { "epoch": 1.58, "learning_rate": 5.786809309654983e-06, "loss": 1.8105, "step": 30 }, { "epoch": 1.58, "eval_accuracy": 0.06335282651072124, "eval_loss": 2.521484375, "eval_runtime": 3.1961, "eval_samples_per_second": 17.834, "eval_steps_per_second": 1.252, "step": 30 }, { "epoch": 1.63, "learning_rate": 4.840776425613887e-06, "loss": 1.9916, "step": 31 }, { "epoch": 1.63, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.5234375, "eval_runtime": 2.7902, "eval_samples_per_second": 20.428, "eval_steps_per_second": 1.434, "step": 31 }, { "epoch": 1.68, "learning_rate": 3.964141339903026e-06, "loss": 1.886, "step": 32 }, { "epoch": 1.68, "eval_accuracy": 0.06346615893739517, "eval_loss": 2.525390625, "eval_runtime": 2.6003, "eval_samples_per_second": 21.921, "eval_steps_per_second": 1.538, "step": 32 }, { "epoch": 1.74, "learning_rate": 3.162892359054098e-06, "loss": 1.8013, "step": 33 }, { "epoch": 1.74, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.52734375, "eval_runtime": 2.3881, "eval_samples_per_second": 23.868, "eval_steps_per_second": 1.675, "step": 33 }, { "epoch": 1.79, "learning_rate": 2.442502826062072e-06, "loss": 1.8435, "step": 34 }, { "epoch": 1.79, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.525390625, "eval_runtime": 3.2047, "eval_samples_per_second": 17.786, "eval_steps_per_second": 1.248, "step": 34 }, { "epoch": 1.84, "learning_rate": 1.8078937319026655e-06, "loss": 2.1229, "step": 35 }, { "epoch": 1.84, "eval_accuracy": 0.06348882542272995, "eval_loss": 2.5234375, "eval_runtime": 2.4087, "eval_samples_per_second": 23.664, "eval_steps_per_second": 1.661, "step": 35 }, { "epoch": 1.89, "learning_rate": 1.2634001001741375e-06, "loss": 1.8739, "step": 36 }, { "epoch": 1.89, "eval_accuracy": 0.06357949136406908, "eval_loss": 2.5234375, "eval_runtime": 3.196, "eval_samples_per_second": 17.835, "eval_steps_per_second": 1.252, "step": 36 }, { "epoch": 1.95, "learning_rate": 8.127413744904805e-07, "loss": 1.7528, "step": 37 }, { "epoch": 1.95, "eval_accuracy": 0.06369282379074301, "eval_loss": 2.5234375, "eval_runtime": 2.1908, "eval_samples_per_second": 26.018, "eval_steps_per_second": 1.826, "step": 37 }, { "epoch": 2.0, "learning_rate": 4.589960109100444e-07, "loss": 1.9462, "step": 38 }, { "epoch": 2.0, "eval_accuracy": 0.06360215784940387, "eval_loss": 2.521484375, "eval_runtime": 2.6164, "eval_samples_per_second": 21.786, "eval_steps_per_second": 1.529, "step": 38 }, { "epoch": 2.0, "step": 38, "total_flos": 2058056761344.0, "train_loss": 2.2499421772203947, "train_runtime": 531.0436, "train_samples_per_second": 1.141, "train_steps_per_second": 0.072 } ], "max_steps": 38, "num_train_epochs": 2, "total_flos": 2058056761344.0, "trial_name": null, "trial_params": null }