{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07401924500370097, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014803849000740192, "eval_loss": 10.37421703338623, "eval_runtime": 1.9912, "eval_samples_per_second": 143.126, "eval_steps_per_second": 71.814, "step": 1 }, { "epoch": 0.007401924500370096, "grad_norm": 0.02240617945790291, "learning_rate": 5e-05, "loss": 10.3744, "step": 5 }, { "epoch": 0.014803849000740192, "grad_norm": 0.027036666870117188, "learning_rate": 0.0001, "loss": 10.3739, "step": 10 }, { "epoch": 0.014803849000740192, "eval_loss": 10.374021530151367, "eval_runtime": 2.0314, "eval_samples_per_second": 140.297, "eval_steps_per_second": 70.395, "step": 10 }, { "epoch": 0.02220577350111029, "grad_norm": 0.024142151698470116, "learning_rate": 9.619397662556435e-05, "loss": 10.3732, "step": 15 }, { "epoch": 0.029607698001480384, "grad_norm": 0.020294418558478355, "learning_rate": 8.535533905932738e-05, "loss": 10.3749, "step": 20 }, { "epoch": 0.029607698001480384, "eval_loss": 10.373614311218262, "eval_runtime": 2.087, "eval_samples_per_second": 136.562, "eval_steps_per_second": 68.52, "step": 20 }, { "epoch": 0.037009622501850484, "grad_norm": 0.020632833242416382, "learning_rate": 6.91341716182545e-05, "loss": 10.3731, "step": 25 }, { "epoch": 0.04441154700222058, "grad_norm": 0.02504877932369709, "learning_rate": 5e-05, "loss": 10.3723, "step": 30 }, { "epoch": 0.04441154700222058, "eval_loss": 10.373309135437012, "eval_runtime": 2.147, "eval_samples_per_second": 132.74, "eval_steps_per_second": 66.603, "step": 30 }, { "epoch": 0.05181347150259067, "grad_norm": 0.022444354370236397, "learning_rate": 3.086582838174551e-05, "loss": 10.373, "step": 35 }, { "epoch": 0.05921539600296077, "grad_norm": 0.028981929644942284, "learning_rate": 1.4644660940672627e-05, "loss": 10.3734, "step": 40 }, { "epoch": 0.05921539600296077, "eval_loss": 10.373154640197754, "eval_runtime": 2.0802, "eval_samples_per_second": 137.004, "eval_steps_per_second": 68.743, "step": 40 }, { "epoch": 0.06661732050333087, "grad_norm": 0.022452278062701225, "learning_rate": 3.8060233744356633e-06, "loss": 10.3721, "step": 45 }, { "epoch": 0.07401924500370097, "grad_norm": 0.023130293935537338, "learning_rate": 0.0, "loss": 10.3744, "step": 50 }, { "epoch": 0.07401924500370097, "eval_loss": 10.373126983642578, "eval_runtime": 2.0562, "eval_samples_per_second": 138.606, "eval_steps_per_second": 69.546, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1293090816000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }