{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.84, "eval_steps": 500, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 2.5e-05, "loss": 2.4616, "step": 1 }, { "epoch": 0.27, "learning_rate": 0.000125, "loss": 2.2926, "step": 5 }, { "epoch": 0.53, "learning_rate": 0.0001995184726672197, "loss": 1.0788, "step": 10 }, { "epoch": 0.8, "learning_rate": 0.00019415440651830208, "loss": 0.3285, "step": 15 }, { "epoch": 0.96, "eval_loss": 0.25046542286872864, "eval_runtime": 4.9708, "eval_samples_per_second": 20.118, "eval_steps_per_second": 2.615, "step": 18 }, { "epoch": 1.07, "learning_rate": 0.00018314696123025454, "loss": 0.2382, "step": 20 }, { "epoch": 1.33, "learning_rate": 0.00016715589548470185, "loss": 0.2235, "step": 25 }, { "epoch": 1.6, "learning_rate": 0.0001471396736825998, "loss": 0.2044, "step": 30 }, { "epoch": 1.87, "learning_rate": 0.0001242980179903264, "loss": 0.1944, "step": 35 }, { "epoch": 1.97, "eval_loss": 0.21891021728515625, "eval_runtime": 4.0841, "eval_samples_per_second": 24.485, "eval_steps_per_second": 3.183, "step": 37 }, { "epoch": 2.13, "learning_rate": 0.0001, "loss": 0.1881, "step": 40 }, { "epoch": 2.4, "learning_rate": 7.570198200967362e-05, "loss": 0.1768, "step": 45 }, { "epoch": 2.67, "learning_rate": 5.286032631740023e-05, "loss": 0.1628, "step": 50 }, { "epoch": 2.93, "learning_rate": 3.2844104515298155e-05, "loss": 0.1767, "step": 55 }, { "epoch": 2.99, "eval_loss": 0.2127072662115097, "eval_runtime": 4.0548, "eval_samples_per_second": 24.662, "eval_steps_per_second": 3.206, "step": 56 }, { "epoch": 3.2, "learning_rate": 1.6853038769745467e-05, "loss": 0.1663, "step": 60 }, { "epoch": 3.47, "learning_rate": 5.8455934816979305e-06, "loss": 0.1468, "step": 65 }, { "epoch": 3.73, "learning_rate": 4.815273327803182e-07, "loss": 0.1591, "step": 70 }, { "epoch": 3.84, "eval_loss": 0.21221186220645905, "eval_runtime": 4.0724, "eval_samples_per_second": 24.556, "eval_steps_per_second": 3.192, "step": 72 }, { "epoch": 3.84, "step": 72, "total_flos": 40669301473280.0, "train_loss": 0.4049788423710399, "train_runtime": 399.0811, "train_samples_per_second": 3.007, "train_steps_per_second": 0.18 } ], "logging_steps": 5, "max_steps": 72, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 40669301473280.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }