{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07017543859649122, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014035087719298245, "eval_loss": 11.166300773620605, "eval_runtime": 1.7265, "eval_samples_per_second": 173.758, "eval_steps_per_second": 86.879, "step": 1 }, { "epoch": 0.007017543859649123, "grad_norm": 4.016921043395996, "learning_rate": 5e-05, "loss": 44.6916, "step": 5 }, { "epoch": 0.014035087719298246, "grad_norm": 4.227939605712891, "learning_rate": 0.0001, "loss": 44.5734, "step": 10 }, { "epoch": 0.014035087719298246, "eval_loss": 11.1311674118042, "eval_runtime": 1.3958, "eval_samples_per_second": 214.926, "eval_steps_per_second": 107.463, "step": 10 }, { "epoch": 0.021052631578947368, "grad_norm": 4.580113410949707, "learning_rate": 9.619397662556435e-05, "loss": 44.4809, "step": 15 }, { "epoch": 0.028070175438596492, "grad_norm": 5.107155799865723, "learning_rate": 8.535533905932738e-05, "loss": 44.2651, "step": 20 }, { "epoch": 0.028070175438596492, "eval_loss": 11.03613567352295, "eval_runtime": 1.4299, "eval_samples_per_second": 209.812, "eval_steps_per_second": 104.906, "step": 20 }, { "epoch": 0.03508771929824561, "grad_norm": 5.350675106048584, "learning_rate": 6.91341716182545e-05, "loss": 44.0636, "step": 25 }, { "epoch": 0.042105263157894736, "grad_norm": 5.687300682067871, "learning_rate": 5e-05, "loss": 43.8965, "step": 30 }, { "epoch": 0.042105263157894736, "eval_loss": 10.94633960723877, "eval_runtime": 1.4322, "eval_samples_per_second": 209.472, "eval_steps_per_second": 104.736, "step": 30 }, { "epoch": 0.04912280701754386, "grad_norm": 5.789061069488525, "learning_rate": 3.086582838174551e-05, "loss": 43.7472, "step": 35 }, { "epoch": 0.056140350877192984, "grad_norm": 5.840606689453125, "learning_rate": 1.4644660940672627e-05, "loss": 43.6562, "step": 40 }, { "epoch": 0.056140350877192984, "eval_loss": 10.899812698364258, "eval_runtime": 1.4235, "eval_samples_per_second": 210.752, "eval_steps_per_second": 105.376, "step": 40 }, { "epoch": 0.06315789473684211, "grad_norm": 5.924816608428955, "learning_rate": 3.8060233744356633e-06, "loss": 43.5953, "step": 45 }, { "epoch": 0.07017543859649122, "grad_norm": 5.921303749084473, "learning_rate": 0.0, "loss": 43.5959, "step": 50 }, { "epoch": 0.07017543859649122, "eval_loss": 10.889740943908691, "eval_runtime": 1.4388, "eval_samples_per_second": 208.502, "eval_steps_per_second": 104.251, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 482240102400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }