{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10537193119901905, "eval_steps": 500, "global_step": 7616, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 1.4067256450653076, "learning_rate": 9.930821699849192e-05, "loss": 2.2227, "step": 500 }, { "epoch": 0.01, "grad_norm": 2.9092910289764404, "learning_rate": 9.861643399698382e-05, "loss": 2.1156, "step": 1000 }, { "epoch": 0.02, "grad_norm": 1.1342206001281738, "learning_rate": 9.792465099547574e-05, "loss": 2.0774, "step": 1500 }, { "epoch": 0.03, "grad_norm": 3.928356409072876, "learning_rate": 9.723286799396766e-05, "loss": 2.0468, "step": 2000 }, { "epoch": 0.03, "grad_norm": 0.9985896944999695, "learning_rate": 9.654108499245957e-05, "loss": 2.0288, "step": 2500 }, { "epoch": 0.04, "grad_norm": 1.6732710599899292, "learning_rate": 9.584930199095148e-05, "loss": 1.9954, "step": 3000 }, { "epoch": 0.05, "grad_norm": 0.9810203313827515, "learning_rate": 9.515751898944339e-05, "loss": 1.9757, "step": 3500 }, { "epoch": 0.06, "grad_norm": 0.8682870864868164, "learning_rate": 9.446573598793531e-05, "loss": 1.9651, "step": 4000 }, { "epoch": 0.06, "grad_norm": 0.9405160546302795, "learning_rate": 9.377395298642722e-05, "loss": 1.9498, "step": 4500 }, { "epoch": 0.07, "grad_norm": 0.8670147061347961, "learning_rate": 9.308216998491913e-05, "loss": 1.935, "step": 5000 }, { "epoch": 0.08, "grad_norm": 0.9158061146736145, "learning_rate": 9.239038698341104e-05, "loss": 1.9252, "step": 5500 }, { "epoch": 0.08, "grad_norm": 0.8484827280044556, "learning_rate": 9.169860398190296e-05, "loss": 1.9001, "step": 6000 }, { "epoch": 0.09, "grad_norm": 0.8079173564910889, "learning_rate": 9.100682098039488e-05, "loss": 1.9174, "step": 6500 }, { "epoch": 0.1, "grad_norm": 0.8451229929924011, "learning_rate": 9.031503797888679e-05, "loss": 1.8939, "step": 7000 }, { "epoch": 0.1, "grad_norm": 0.7751355171203613, "learning_rate": 8.962325497737871e-05, "loss": 1.8915, "step": 7500 } ], "logging_steps": 500, "max_steps": 72277, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 16, "total_flos": 6.94759669352104e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }