{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0006815122757398668, "eval_steps": 2, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.407561378699334e-05, "grad_norm": 2.638183116912842, "learning_rate": 1e-05, "loss": 44.3756, "step": 1 }, { "epoch": 3.407561378699334e-05, "eval_loss": 11.093536376953125, "eval_runtime": 113.9897, "eval_samples_per_second": 108.405, "eval_steps_per_second": 54.207, "step": 1 }, { "epoch": 6.815122757398668e-05, "grad_norm": 2.178905963897705, "learning_rate": 2e-05, "loss": 44.2993, "step": 2 }, { "epoch": 6.815122757398668e-05, "eval_loss": 11.093539237976074, "eval_runtime": 114.5009, "eval_samples_per_second": 107.921, "eval_steps_per_second": 53.965, "step": 2 }, { "epoch": 0.00010222684136098001, "grad_norm": 2.257598876953125, "learning_rate": 3e-05, "loss": 44.3175, "step": 3 }, { "epoch": 0.00013630245514797336, "grad_norm": 3.2154581546783447, "learning_rate": 4e-05, "loss": 44.4066, "step": 4 }, { "epoch": 0.00013630245514797336, "eval_loss": 11.092741012573242, "eval_runtime": 114.161, "eval_samples_per_second": 108.242, "eval_steps_per_second": 54.125, "step": 4 }, { "epoch": 0.0001703780689349667, "grad_norm": 2.7002158164978027, "learning_rate": 5e-05, "loss": 44.4089, "step": 5 }, { "epoch": 0.00020445368272196002, "grad_norm": 2.5694777965545654, "learning_rate": 6e-05, "loss": 44.3066, "step": 6 }, { "epoch": 0.00020445368272196002, "eval_loss": 11.091360092163086, "eval_runtime": 114.2137, "eval_samples_per_second": 108.192, "eval_steps_per_second": 54.1, "step": 6 }, { "epoch": 0.00023852929650895337, "grad_norm": 2.498781442642212, "learning_rate": 7e-05, "loss": 44.3137, "step": 7 }, { "epoch": 0.0002726049102959467, "grad_norm": 2.4782936573028564, "learning_rate": 8e-05, "loss": 44.3997, "step": 8 }, { "epoch": 0.0002726049102959467, "eval_loss": 11.08991527557373, "eval_runtime": 114.0478, "eval_samples_per_second": 108.349, "eval_steps_per_second": 54.179, "step": 8 }, { "epoch": 0.00030668052408294006, "grad_norm": 2.774634599685669, "learning_rate": 9e-05, "loss": 44.3353, "step": 9 }, { "epoch": 0.0003407561378699334, "grad_norm": 2.3270585536956787, "learning_rate": 0.0001, "loss": 44.4153, "step": 10 }, { "epoch": 0.0003407561378699334, "eval_loss": 11.087890625, "eval_runtime": 114.1863, "eval_samples_per_second": 108.218, "eval_steps_per_second": 54.113, "step": 10 }, { "epoch": 0.0003748317516569267, "grad_norm": 2.2423906326293945, "learning_rate": 9.755282581475769e-05, "loss": 44.3149, "step": 11 }, { "epoch": 0.00040890736544392005, "grad_norm": 2.1097352504730225, "learning_rate": 9.045084971874738e-05, "loss": 44.4146, "step": 12 }, { "epoch": 0.00040890736544392005, "eval_loss": 11.085128784179688, "eval_runtime": 114.1984, "eval_samples_per_second": 108.206, "eval_steps_per_second": 54.108, "step": 12 }, { "epoch": 0.0004429829792309134, "grad_norm": 2.6091725826263428, "learning_rate": 7.938926261462366e-05, "loss": 44.3838, "step": 13 }, { "epoch": 0.00047705859301790674, "grad_norm": 2.9170174598693848, "learning_rate": 6.545084971874738e-05, "loss": 44.4147, "step": 14 }, { "epoch": 0.00047705859301790674, "eval_loss": 11.082711219787598, "eval_runtime": 114.2172, "eval_samples_per_second": 108.189, "eval_steps_per_second": 54.099, "step": 14 }, { "epoch": 0.0005111342068049, "grad_norm": 2.4831223487854004, "learning_rate": 5e-05, "loss": 44.2988, "step": 15 }, { "epoch": 0.0005452098205918934, "grad_norm": 2.7560153007507324, "learning_rate": 3.4549150281252636e-05, "loss": 44.2967, "step": 16 }, { "epoch": 0.0005452098205918934, "eval_loss": 11.08093547821045, "eval_runtime": 114.1435, "eval_samples_per_second": 108.258, "eval_steps_per_second": 54.134, "step": 16 }, { "epoch": 0.0005792854343788867, "grad_norm": 2.718780040740967, "learning_rate": 2.061073738537635e-05, "loss": 44.2053, "step": 17 }, { "epoch": 0.0006133610481658801, "grad_norm": 2.361675262451172, "learning_rate": 9.549150281252633e-06, "loss": 44.2844, "step": 18 }, { "epoch": 0.0006133610481658801, "eval_loss": 11.079801559448242, "eval_runtime": 114.2329, "eval_samples_per_second": 108.174, "eval_steps_per_second": 54.091, "step": 18 }, { "epoch": 0.0006474366619528734, "grad_norm": 2.549006938934326, "learning_rate": 2.4471741852423237e-06, "loss": 44.2642, "step": 19 }, { "epoch": 0.0006815122757398668, "grad_norm": 2.272350549697876, "learning_rate": 0.0, "loss": 44.2192, "step": 20 }, { "epoch": 0.0006815122757398668, "eval_loss": 11.079532623291016, "eval_runtime": 114.2157, "eval_samples_per_second": 108.19, "eval_steps_per_second": 54.099, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 55215390720.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }