{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5, "eval_steps": 20, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1e-05, "loss": 0.588, "step": 10 }, { "epoch": 0.2, "learning_rate": 2e-05, "loss": 0.5793, "step": 20 }, { "epoch": 0.2, "eval_loss": 0.5849277377128601, "eval_runtime": 13.5251, "eval_samples_per_second": 1.848, "eval_steps_per_second": 0.296, "step": 20 }, { "epoch": 0.3, "learning_rate": 3e-05, "loss": 0.5065, "step": 30 }, { "epoch": 0.4, "learning_rate": 4e-05, "loss": 0.5404, "step": 40 }, { "epoch": 0.4, "eval_loss": 0.5613898634910583, "eval_runtime": 13.3646, "eval_samples_per_second": 1.871, "eval_steps_per_second": 0.299, "step": 40 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 0.4523, "step": 50 }, { "epoch": 0.6, "learning_rate": 4.9986331433523156e-05, "loss": 0.5692, "step": 60 }, { "epoch": 0.6, "eval_loss": 0.5513718724250793, "eval_runtime": 13.3937, "eval_samples_per_second": 1.867, "eval_steps_per_second": 0.299, "step": 60 }, { "epoch": 0.7, "learning_rate": 4.994534068046937e-05, "loss": 0.5257, "step": 70 }, { "epoch": 0.8, "learning_rate": 4.9877072563625285e-05, "loss": 0.4937, "step": 80 }, { "epoch": 0.8, "eval_loss": 0.5424299836158752, "eval_runtime": 13.4443, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.298, "step": 80 }, { "epoch": 0.9, "learning_rate": 4.978160173317438e-05, "loss": 0.5617, "step": 90 }, { "epoch": 1.0, "learning_rate": 4.965903258506806e-05, "loss": 0.3592, "step": 100 }, { "epoch": 1.0, "eval_loss": 0.5260083675384521, "eval_runtime": 13.4207, "eval_samples_per_second": 1.863, "eval_steps_per_second": 0.298, "step": 100 }, { "epoch": 1.1, "learning_rate": 4.9509499146870236e-05, "loss": 0.4586, "step": 110 }, { "epoch": 1.2, "learning_rate": 4.933316493120015e-05, "loss": 0.3724, "step": 120 }, { "epoch": 1.2, "eval_loss": 0.5247484445571899, "eval_runtime": 13.4092, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 120 }, { "epoch": 1.3, "learning_rate": 4.913022275693372e-05, "loss": 0.3205, "step": 130 }, { "epoch": 1.4, "learning_rate": 4.8900894538358944e-05, "loss": 0.4962, "step": 140 }, { "epoch": 1.4, "eval_loss": 0.5248944163322449, "eval_runtime": 13.4511, "eval_samples_per_second": 1.859, "eval_steps_per_second": 0.297, "step": 140 }, { "epoch": 1.5, "learning_rate": 4.864543104251587e-05, "loss": 0.3324, "step": 150 }, { "epoch": 1.6, "learning_rate": 4.8364111614986527e-05, "loss": 0.4781, "step": 160 }, { "epoch": 1.6, "eval_loss": 0.5312905311584473, "eval_runtime": 13.4112, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 160 }, { "epoch": 1.7, "learning_rate": 4.805724387443462e-05, "loss": 0.3429, "step": 170 }, { "epoch": 1.8, "learning_rate": 4.7725163376229064e-05, "loss": 0.3504, "step": 180 }, { "epoch": 1.8, "eval_loss": 0.5292657017707825, "eval_runtime": 13.3837, "eval_samples_per_second": 1.868, "eval_steps_per_second": 0.299, "step": 180 }, { "epoch": 1.9, "learning_rate": 4.736823324551909e-05, "loss": 0.4843, "step": 190 }, { "epoch": 2.0, "learning_rate": 4.698684378016222e-05, "loss": 0.2449, "step": 200 }, { "epoch": 2.0, "eval_loss": 0.5281097292900085, "eval_runtime": 13.438, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.298, "step": 200 }, { "epoch": 2.1, "learning_rate": 4.6581412023939354e-05, "loss": 0.3535, "step": 210 }, { "epoch": 2.2, "learning_rate": 4.6152381310523387e-05, "loss": 0.2264, "step": 220 }, { "epoch": 2.2, "eval_loss": 0.5974144339561462, "eval_runtime": 13.4417, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.298, "step": 220 }, { "epoch": 2.3, "learning_rate": 4.5700220778700504e-05, "loss": 0.2303, "step": 230 }, { "epoch": 2.4, "learning_rate": 4.522542485937369e-05, "loss": 0.3395, "step": 240 }, { "epoch": 2.4, "eval_loss": 0.5777361989021301, "eval_runtime": 13.414, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 240 }, { "epoch": 2.5, "learning_rate": 4.4728512734909844e-05, "loss": 0.1651, "step": 250 } ], "logging_steps": 10, "max_steps": 1000, "num_train_epochs": 10, "save_steps": 50, "total_flos": 3.515781426562253e+16, "trial_name": null, "trial_params": null }