{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "global_step": 658, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 6.666666666666667e-05, "loss": 3.4338, "step": 94 }, { "epoch": 1.0, "eval_loss": 2.4137182235717773, "eval_runtime": 36.2713, "eval_samples_per_second": 5.514, "eval_steps_per_second": 0.689, "step": 94 }, { "epoch": 2.0, "learning_rate": 3.3333333333333335e-05, "loss": 2.9565, "step": 188 }, { "epoch": 2.0, "eval_loss": 2.173758029937744, "eval_runtime": 35.902, "eval_samples_per_second": 5.571, "eval_steps_per_second": 0.696, "step": 188 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 2.7101, "step": 282 }, { "epoch": 3.0, "eval_loss": 2.012174606323242, "eval_runtime": 35.6264, "eval_samples_per_second": 5.614, "eval_steps_per_second": 0.702, "step": 282 }, { "epoch": 4.0, "learning_rate": 4.2857142857142856e-05, "loss": 2.7515, "step": 376 }, { "epoch": 4.0, "eval_loss": 1.964645504951477, "eval_runtime": 35.4193, "eval_samples_per_second": 5.647, "eval_steps_per_second": 0.706, "step": 376 }, { "epoch": 5.0, "learning_rate": 2.857142857142857e-05, "loss": 2.724, "step": 470 }, { "epoch": 5.0, "eval_loss": 2.1284220218658447, "eval_runtime": 34.8555, "eval_samples_per_second": 5.738, "eval_steps_per_second": 0.717, "step": 470 }, { "epoch": 6.0, "learning_rate": 1.4285714285714285e-05, "loss": 2.6193, "step": 564 }, { "epoch": 6.0, "eval_loss": 1.9379758834838867, "eval_runtime": 36.3088, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.689, "step": 564 }, { "epoch": 7.0, "learning_rate": 0.0, "loss": 2.5032, "step": 658 }, { "epoch": 7.0, "eval_loss": 1.9285995960235596, "eval_runtime": 35.9858, "eval_samples_per_second": 5.558, "eval_steps_per_second": 0.695, "step": 658 }, { "epoch": 7.0, "step": 658, "total_flos": 2.50893526464e+18, "train_loss": 1.5139910805189138, "train_runtime": 964.856, "train_samples_per_second": 5.441, "train_steps_per_second": 0.682 } ], "max_steps": 658, "num_train_epochs": 7, "total_flos": 2.50893526464e+18, "trial_name": null, "trial_params": null }