{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.561600611942438, "eval_steps": 50000, "global_step": 600000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 4.9920355054102726e-05, "loss": 1.1412, "step": 50000 }, { "epoch": 0.8, "eval_loss": 1.011365294456482, "eval_runtime": 2006.5534, "eval_samples_per_second": 111.193, "eval_steps_per_second": 1.738, "step": 50000 }, { "epoch": 1.59, "learning_rate": 4.992035346050262e-05, "loss": 1.0123, "step": 100000 }, { "epoch": 1.59, "eval_loss": 0.9693423509597778, "eval_runtime": 1995.4615, "eval_samples_per_second": 111.811, "eval_steps_per_second": 1.747, "step": 100000 }, { "epoch": 2.39, "learning_rate": 4.992034708610221e-05, "loss": 0.9754, "step": 150000 }, { "epoch": 2.39, "eval_loss": 0.9472731351852417, "eval_runtime": 1997.2157, "eval_samples_per_second": 111.713, "eval_steps_per_second": 1.746, "step": 150000 }, { "epoch": 3.19, "learning_rate": 4.992034549250211e-05, "loss": 0.9539, "step": 200000 }, { "epoch": 3.19, "eval_loss": 0.9325647354125977, "eval_runtime": 2015.1208, "eval_samples_per_second": 110.72, "eval_steps_per_second": 1.73, "step": 200000 }, { "epoch": 3.98, "learning_rate": 4.992035027330242e-05, "loss": 0.9387, "step": 250000 }, { "epoch": 3.98, "eval_loss": 0.9212433099746704, "eval_runtime": 2011.4269, "eval_samples_per_second": 110.924, "eval_steps_per_second": 1.734, "step": 250000 }, { "epoch": 4.78, "learning_rate": 4.992035027330242e-05, "loss": 0.9243, "step": 300000 }, { "epoch": 4.78, "eval_loss": 0.9138051271438599, "eval_runtime": 2011.3973, "eval_samples_per_second": 110.925, "eval_steps_per_second": 1.734, "step": 300000 }, { "epoch": 5.58, "learning_rate": 4.992035186690252e-05, "loss": 0.9144, "step": 350000 }, { "epoch": 5.58, "eval_loss": 0.9093130826950073, "eval_runtime": 1998.4573, "eval_samples_per_second": 111.644, "eval_steps_per_second": 1.745, "step": 350000 }, { "epoch": 6.37, "learning_rate": 4.9920355054102726e-05, "loss": 0.906, "step": 400000 }, { "epoch": 6.37, "eval_loss": 0.9041373133659363, "eval_runtime": 1998.7351, "eval_samples_per_second": 111.628, "eval_steps_per_second": 1.745, "step": 400000 }, { "epoch": 7.17, "learning_rate": 4.9920355054102726e-05, "loss": 0.8994, "step": 450000 }, { "epoch": 7.17, "eval_loss": 0.9003444910049438, "eval_runtime": 1982.6092, "eval_samples_per_second": 112.536, "eval_steps_per_second": 1.759, "step": 450000 }, { "epoch": 7.97, "learning_rate": 4.992035186690252e-05, "loss": 0.8933, "step": 500000 }, { "epoch": 7.97, "eval_loss": 0.8956149220466614, "eval_runtime": 2002.7479, "eval_samples_per_second": 111.404, "eval_steps_per_second": 1.741, "step": 500000 }, { "epoch": 8.76, "learning_rate": 4.9920355054102726e-05, "loss": 0.8856, "step": 550000 }, { "epoch": 8.76, "eval_loss": 0.8930546045303345, "eval_runtime": 1996.5839, "eval_samples_per_second": 111.748, "eval_steps_per_second": 1.746, "step": 550000 }, { "epoch": 9.56, "learning_rate": 4.992035346050262e-05, "loss": 0.8802, "step": 600000 }, { "epoch": 9.56, "eval_loss": 0.89084392786026, "eval_runtime": 1991.4984, "eval_samples_per_second": 112.034, "eval_steps_per_second": 1.751, "step": 600000 } ], "logging_steps": 50000, "max_steps": 31375500, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 50000, "total_flos": 1.25419881406464e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }