{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 25, "global_step": 250, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 218.56756591796875, "learning_rate": 0.00020137375226675998, "loss": 116.2836, "step": 10 }, { "epoch": 0.08, "grad_norm": 63.336116790771484, "learning_rate": 0.000192983179255645, "loss": 19.7601, "step": 20 }, { "epoch": 0.1, "eval_loss": 2.302827835083008, "eval_meteor": 0.0, "eval_rouge": 0.0, "eval_runtime": 185.734, "eval_samples_per_second": 1.346, "eval_steps_per_second": 1.346, "step": 25 }, { "epoch": 0.12, "grad_norm": 70.50274658203125, "learning_rate": 0.00018459260624453, "loss": 9.1734, "step": 30 }, { "epoch": 0.16, "grad_norm": 6.8686909675598145, "learning_rate": 0.00017620203323341497, "loss": 2.7262, "step": 40 }, { "epoch": 0.2, "grad_norm": 3.4275879859924316, "learning_rate": 0.0001678114602223, "loss": 1.414, "step": 50 }, { "epoch": 0.2, "eval_loss": 0.28776201605796814, "eval_meteor": 0.0321522930829127, "eval_rouge": 0.04923081784861186, "eval_runtime": 190.019, "eval_samples_per_second": 1.316, "eval_steps_per_second": 1.316, "step": 50 }, { "epoch": 0.24, "grad_norm": 1.1697942018508911, "learning_rate": 0.000159420887211185, "loss": 0.9842, "step": 60 }, { "epoch": 0.28, "grad_norm": 1.0976201295852661, "learning_rate": 0.00015103031420006998, "loss": 0.8046, "step": 70 }, { "epoch": 0.3, "eval_loss": 0.15482668578624725, "eval_meteor": 0.07836448350060113, "eval_rouge": 0.11588703439134326, "eval_runtime": 185.0738, "eval_samples_per_second": 1.351, "eval_steps_per_second": 1.351, "step": 75 }, { "epoch": 0.32, "grad_norm": 2.179997682571411, "learning_rate": 0.000142639741188955, "loss": 0.6368, "step": 80 }, { "epoch": 0.36, "grad_norm": 0.6928311586380005, "learning_rate": 0.00013424916817784, "loss": 0.5702, "step": 90 }, { "epoch": 0.4, "grad_norm": 3.005065679550171, "learning_rate": 0.00012585859516672497, "loss": 1.5576, "step": 100 }, { "epoch": 0.4, "eval_loss": 0.1288101226091385, "eval_meteor": 0.09437373191900951, "eval_rouge": 0.13950906104693644, "eval_runtime": 185.6365, "eval_samples_per_second": 1.347, "eval_steps_per_second": 1.347, "step": 100 }, { "epoch": 0.44, "grad_norm": 161.4024658203125, "learning_rate": 0.00011746802215561, "loss": 0.5221, "step": 110 }, { "epoch": 0.48, "grad_norm": 3.9336469173431396, "learning_rate": 0.00010907744914449499, "loss": 0.5364, "step": 120 }, { "epoch": 0.5, "eval_loss": 0.12464781105518341, "eval_meteor": 0.10298030905449183, "eval_rouge": 0.15341155496038483, "eval_runtime": 185.1181, "eval_samples_per_second": 1.35, "eval_steps_per_second": 1.35, "step": 125 }, { "epoch": 0.52, "grad_norm": 1.0329785346984863, "learning_rate": 0.00010068687613337999, "loss": 0.489, "step": 130 }, { "epoch": 0.56, "grad_norm": 1525.2017822265625, "learning_rate": 9.2296303122265e-05, "loss": 0.5098, "step": 140 }, { "epoch": 0.6, "grad_norm": 0.729365885257721, "learning_rate": 8.390573011115e-05, "loss": 0.4573, "step": 150 }, { "epoch": 0.6, "eval_loss": 0.12196389585733414, "eval_meteor": 0.11105957979118833, "eval_rouge": 0.1671068291083625, "eval_runtime": 185.6124, "eval_samples_per_second": 1.347, "eval_steps_per_second": 1.347, "step": 150 }, { "epoch": 0.64, "grad_norm": 0.80484539270401, "learning_rate": 7.551515710003499e-05, "loss": 0.4867, "step": 160 }, { "epoch": 0.68, "grad_norm": 1.1568104028701782, "learning_rate": 6.712458408892e-05, "loss": 0.4605, "step": 170 }, { "epoch": 0.7, "eval_loss": 0.12032772600650787, "eval_meteor": 0.11023612064638272, "eval_rouge": 0.16584557386508242, "eval_runtime": 186.0494, "eval_samples_per_second": 1.344, "eval_steps_per_second": 1.344, "step": 175 }, { "epoch": 0.72, "grad_norm": 2.0484607219696045, "learning_rate": 5.8734011077805e-05, "loss": 0.484, "step": 180 }, { "epoch": 0.76, "grad_norm": 0.6715890169143677, "learning_rate": 5.0343438066689994e-05, "loss": 0.5185, "step": 190 }, { "epoch": 0.8, "grad_norm": 0.8641834855079651, "learning_rate": 4.1952865055575e-05, "loss": 0.4503, "step": 200 }, { "epoch": 0.8, "eval_loss": 0.11902841925621033, "eval_meteor": 0.1107724532213342, "eval_rouge": 0.16928825200269476, "eval_runtime": 185.5798, "eval_samples_per_second": 1.347, "eval_steps_per_second": 1.347, "step": 200 }, { "epoch": 0.84, "grad_norm": 0.8465960025787354, "learning_rate": 3.356229204446e-05, "loss": 0.4348, "step": 210 }, { "epoch": 0.88, "grad_norm": 0.7913973331451416, "learning_rate": 2.5171719033344997e-05, "loss": 0.4515, "step": 220 }, { "epoch": 0.9, "eval_loss": 0.11855262517929077, "eval_meteor": 0.11552610996579722, "eval_rouge": 0.17371654211387, "eval_runtime": 185.2214, "eval_samples_per_second": 1.35, "eval_steps_per_second": 1.35, "step": 225 }, { "epoch": 0.92, "grad_norm": 0.65450519323349, "learning_rate": 1.678114602223e-05, "loss": 0.4112, "step": 230 }, { "epoch": 0.96, "grad_norm": 5.642087459564209, "learning_rate": 8.390573011115e-06, "loss": 0.478, "step": 240 }, { "epoch": 1.0, "grad_norm": 0.7398505806922913, "learning_rate": 0.0, "loss": 0.4641, "step": 250 }, { "epoch": 1.0, "eval_loss": 0.11815133690834045, "eval_meteor": 0.11557259073835732, "eval_rouge": 0.17417265621215244, "eval_runtime": 185.0931, "eval_samples_per_second": 1.351, "eval_steps_per_second": 1.351, "step": 250 } ], "logging_steps": 10, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1027178496000000.0, "train_batch_size": 1, "trial_name": null, "trial_params": { "gradient_accumulation_steps": 3, "learning_rate": 0.00020976432527787498, "num_train_epochs": 1 } }