{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1261261261261262, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11261261261261261, "grad_norm": 0.618191659450531, "learning_rate": 0.00019510565162951537, "loss": 1.2486, "step": 25 }, { "epoch": 0.22522522522522523, "grad_norm": 0.8095678687095642, "learning_rate": 0.00018090169943749476, "loss": 1.2557, "step": 50 }, { "epoch": 0.33783783783783783, "grad_norm": 0.578891396522522, "learning_rate": 0.00015877852522924732, "loss": 1.257, "step": 75 }, { "epoch": 0.45045045045045046, "grad_norm": 0.6523413062095642, "learning_rate": 0.00013090169943749476, "loss": 1.2573, "step": 100 }, { "epoch": 0.5630630630630631, "grad_norm": 0.5457090735435486, "learning_rate": 0.0001, "loss": 1.2009, "step": 125 }, { "epoch": 0.6756756756756757, "grad_norm": 0.5979616045951843, "learning_rate": 6.909830056250527e-05, "loss": 1.2826, "step": 150 }, { "epoch": 0.7882882882882883, "grad_norm": 0.6098126173019409, "learning_rate": 4.12214747707527e-05, "loss": 1.3606, "step": 175 }, { "epoch": 0.9009009009009009, "grad_norm": 0.7838721871376038, "learning_rate": 1.9098300562505266e-05, "loss": 1.3275, "step": 200 }, { "epoch": 1.0135135135135136, "grad_norm": 0.5452545881271362, "learning_rate": 4.8943483704846475e-06, "loss": 1.31, "step": 225 }, { "epoch": 1.1261261261261262, "grad_norm": 0.5899876356124878, "learning_rate": 0.0, "loss": 1.2133, "step": 250 } ], "logging_steps": 25, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 814586836156416.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }