{ "best_metric": 0.022033799439668655, "best_model_checkpoint": "./results/checkpoint-6000", "epoch": 0.687915615684476, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 1.9617824657953072e-05, "loss": 0.058, "step": 500 }, { "epoch": 0.06, "eval_accuracy": 0.9880467809436451, "eval_loss": 0.04749465361237526, "eval_runtime": 369.9155, "eval_samples_per_second": 94.308, "eval_steps_per_second": 5.896, "step": 500 }, { "epoch": 0.11, "learning_rate": 1.923564931590614e-05, "loss": 0.0513, "step": 1000 }, { "epoch": 0.11, "eval_accuracy": 0.9892220374935504, "eval_loss": 0.044388506561517715, "eval_runtime": 313.5583, "eval_samples_per_second": 111.258, "eval_steps_per_second": 6.956, "step": 1000 }, { "epoch": 0.17, "learning_rate": 1.885347397385921e-05, "loss": 0.0413, "step": 1500 }, { "epoch": 0.17, "eval_accuracy": 0.9901393108983546, "eval_loss": 0.044446878135204315, "eval_runtime": 322.6458, "eval_samples_per_second": 108.125, "eval_steps_per_second": 6.76, "step": 1500 }, { "epoch": 0.23, "learning_rate": 1.847129863181228e-05, "loss": 0.0364, "step": 2000 }, { "epoch": 0.23, "eval_accuracy": 0.9911712434787594, "eval_loss": 0.037910301238298416, "eval_runtime": 258.562, "eval_samples_per_second": 134.923, "eval_steps_per_second": 8.435, "step": 2000 }, { "epoch": 0.29, "learning_rate": 1.8089123289765345e-05, "loss": 0.0389, "step": 2500 }, { "epoch": 0.29, "eval_accuracy": 0.9921171816774638, "eval_loss": 0.03007333353161812, "eval_runtime": 258.8834, "eval_samples_per_second": 134.756, "eval_steps_per_second": 8.425, "step": 2500 }, { "epoch": 0.34, "learning_rate": 1.7706947947718412e-05, "loss": 0.0343, "step": 3000 }, { "epoch": 0.34, "eval_accuracy": 0.9923465000286648, "eval_loss": 0.03364783525466919, "eval_runtime": 258.3829, "eval_samples_per_second": 135.017, "eval_steps_per_second": 8.441, "step": 3000 }, { "epoch": 0.4, "learning_rate": 1.7324772605671482e-05, "loss": 0.0351, "step": 3500 }, { "epoch": 0.4, "eval_accuracy": 0.9918591985323626, "eval_loss": 0.030066516250371933, "eval_runtime": 258.5585, "eval_samples_per_second": 134.925, "eval_steps_per_second": 8.435, "step": 3500 }, { "epoch": 0.46, "learning_rate": 1.6942597263624552e-05, "loss": 0.0343, "step": 4000 }, { "epoch": 0.46, "eval_accuracy": 0.9925471535859657, "eval_loss": 0.02801605314016342, "eval_runtime": 308.2791, "eval_samples_per_second": 113.164, "eval_steps_per_second": 7.075, "step": 4000 }, { "epoch": 0.52, "learning_rate": 1.6560421921577622e-05, "loss": 0.0328, "step": 4500 }, { "epoch": 0.52, "eval_accuracy": 0.9937224101358711, "eval_loss": 0.022993654012680054, "eval_runtime": 304.7606, "eval_samples_per_second": 114.47, "eval_steps_per_second": 7.156, "step": 4500 }, { "epoch": 0.57, "learning_rate": 1.617824657953069e-05, "loss": 0.0322, "step": 5000 }, { "epoch": 0.57, "eval_accuracy": 0.9930631198761681, "eval_loss": 0.022875914350152016, "eval_runtime": 509.8625, "eval_samples_per_second": 68.422, "eval_steps_per_second": 4.278, "step": 5000 }, { "epoch": 0.63, "learning_rate": 1.579607123748376e-05, "loss": 0.0275, "step": 5500 }, { "epoch": 0.63, "eval_accuracy": 0.994353035601674, "eval_loss": 0.024034755304455757, "eval_runtime": 296.8109, "eval_samples_per_second": 117.536, "eval_steps_per_second": 7.348, "step": 5500 }, { "epoch": 0.69, "learning_rate": 1.541389589543683e-05, "loss": 0.0288, "step": 6000 }, { "epoch": 0.69, "eval_accuracy": 0.9946396835406753, "eval_loss": 0.022033799439668655, "eval_runtime": 293.7762, "eval_samples_per_second": 118.75, "eval_steps_per_second": 7.424, "step": 6000 } ], "max_steps": 26166, "num_train_epochs": 3, "total_flos": 1.2716870270976e+16, "trial_name": null, "trial_params": null }