{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 3970, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.0222, "eval_gen_len": 19.0, "eval_loss": 1.032914400100708, "eval_runtime": 38.8818, "eval_samples_per_second": 20.395, "eval_steps_per_second": 1.286, "step": 397 }, { "epoch": 1.26, "learning_rate": 4.370277078085643e-05, "loss": 1.5198, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.0395, "eval_gen_len": 19.0, "eval_loss": 0.9034101963043213, "eval_runtime": 38.8135, "eval_samples_per_second": 20.431, "eval_steps_per_second": 1.288, "step": 794 }, { "epoch": 2.52, "learning_rate": 3.7405541561712845e-05, "loss": 1.0351, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.0686, "eval_gen_len": 19.0, "eval_loss": 0.8492476344108582, "eval_runtime": 38.98, "eval_samples_per_second": 20.344, "eval_steps_per_second": 1.283, "step": 1191 }, { "epoch": 3.78, "learning_rate": 3.1108312342569276e-05, "loss": 0.9326, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 0.0832, "eval_gen_len": 19.0, "eval_loss": 0.8076524138450623, "eval_runtime": 38.884, "eval_samples_per_second": 20.394, "eval_steps_per_second": 1.286, "step": 1588 }, { "epoch": 5.0, "eval_bleu": 0.0983, "eval_gen_len": 19.0, "eval_loss": 0.7785636782646179, "eval_runtime": 38.9036, "eval_samples_per_second": 20.384, "eval_steps_per_second": 1.285, "step": 1985 }, { "epoch": 5.04, "learning_rate": 2.4811083123425694e-05, "loss": 0.8785, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 0.0606, "eval_gen_len": 19.0, "eval_loss": 0.7609861493110657, "eval_runtime": 39.0721, "eval_samples_per_second": 20.296, "eval_steps_per_second": 1.28, "step": 2382 }, { "epoch": 6.3, "learning_rate": 1.8513853904282116e-05, "loss": 0.8436, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 0.065, "eval_gen_len": 19.0, "eval_loss": 0.7446194291114807, "eval_runtime": 39.2241, "eval_samples_per_second": 20.217, "eval_steps_per_second": 1.275, "step": 2779 }, { "epoch": 7.56, "learning_rate": 1.2216624685138539e-05, "loss": 0.8153, "step": 3000 }, { "epoch": 8.0, "eval_bleu": 0.0618, "eval_gen_len": 19.0, "eval_loss": 0.7361475825309753, "eval_runtime": 38.8407, "eval_samples_per_second": 20.417, "eval_steps_per_second": 1.287, "step": 3176 }, { "epoch": 8.82, "learning_rate": 5.919395465994963e-06, "loss": 0.7987, "step": 3500 }, { "epoch": 9.0, "eval_bleu": 0.068, "eval_gen_len": 19.0, "eval_loss": 0.7305701971054077, "eval_runtime": 38.7216, "eval_samples_per_second": 20.48, "eval_steps_per_second": 1.291, "step": 3573 }, { "epoch": 10.0, "eval_bleu": 0.0616, "eval_gen_len": 19.0, "eval_loss": 0.7289602160453796, "eval_runtime": 38.8065, "eval_samples_per_second": 20.435, "eval_steps_per_second": 1.288, "step": 3970 }, { "epoch": 10.0, "step": 3970, "total_flos": 2.255280810983424e+16, "train_loss": 0.9529764708703952, "train_runtime": 2099.0683, "train_samples_per_second": 30.213, "train_steps_per_second": 1.891 } ], "max_steps": 3970, "num_train_epochs": 10, "total_flos": 2.255280810983424e+16, "trial_name": null, "trial_params": null }