{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.920001274880082, "eval_steps": 50000, "global_step": 1250000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 4.9920355054102726e-05, "loss": 1.1412, "step": 50000 }, { "epoch": 0.8, "eval_loss": 1.011365294456482, "eval_runtime": 2006.5534, "eval_samples_per_second": 111.193, "eval_steps_per_second": 1.738, "step": 50000 }, { "epoch": 1.59, "learning_rate": 4.992035346050262e-05, "loss": 1.0123, "step": 100000 }, { "epoch": 1.59, "eval_loss": 0.9693423509597778, "eval_runtime": 1995.4615, "eval_samples_per_second": 111.811, "eval_steps_per_second": 1.747, "step": 100000 }, { "epoch": 2.39, "learning_rate": 4.992034708610221e-05, "loss": 0.9754, "step": 150000 }, { "epoch": 2.39, "eval_loss": 0.9472731351852417, "eval_runtime": 1997.2157, "eval_samples_per_second": 111.713, "eval_steps_per_second": 1.746, "step": 150000 }, { "epoch": 3.19, "learning_rate": 4.992034549250211e-05, "loss": 0.9539, "step": 200000 }, { "epoch": 3.19, "eval_loss": 0.9325647354125977, "eval_runtime": 2015.1208, "eval_samples_per_second": 110.72, "eval_steps_per_second": 1.73, "step": 200000 }, { "epoch": 3.98, "learning_rate": 4.992035027330242e-05, "loss": 0.9387, "step": 250000 }, { "epoch": 3.98, "eval_loss": 0.9212433099746704, "eval_runtime": 2011.4269, "eval_samples_per_second": 110.924, "eval_steps_per_second": 1.734, "step": 250000 }, { "epoch": 4.78, "learning_rate": 4.992035027330242e-05, "loss": 0.9243, "step": 300000 }, { "epoch": 4.78, "eval_loss": 0.9138051271438599, "eval_runtime": 2011.3973, "eval_samples_per_second": 110.925, "eval_steps_per_second": 1.734, "step": 300000 }, { "epoch": 5.58, "learning_rate": 4.992035186690252e-05, "loss": 0.9144, "step": 350000 }, { "epoch": 5.58, "eval_loss": 0.9093130826950073, "eval_runtime": 1998.4573, "eval_samples_per_second": 111.644, "eval_steps_per_second": 1.745, "step": 350000 }, { "epoch": 6.37, "learning_rate": 4.9920355054102726e-05, "loss": 0.906, "step": 400000 }, { "epoch": 6.37, "eval_loss": 0.9041373133659363, "eval_runtime": 1998.7351, "eval_samples_per_second": 111.628, "eval_steps_per_second": 1.745, "step": 400000 }, { "epoch": 7.17, "learning_rate": 4.9920355054102726e-05, "loss": 0.8994, "step": 450000 }, { "epoch": 7.17, "eval_loss": 0.9003444910049438, "eval_runtime": 1982.6092, "eval_samples_per_second": 112.536, "eval_steps_per_second": 1.759, "step": 450000 }, { "epoch": 7.97, "learning_rate": 4.992035186690252e-05, "loss": 0.8933, "step": 500000 }, { "epoch": 7.97, "eval_loss": 0.8956149220466614, "eval_runtime": 2002.7479, "eval_samples_per_second": 111.404, "eval_steps_per_second": 1.741, "step": 500000 }, { "epoch": 8.76, "learning_rate": 4.9920355054102726e-05, "loss": 0.8856, "step": 550000 }, { "epoch": 8.76, "eval_loss": 0.8930546045303345, "eval_runtime": 1996.5839, "eval_samples_per_second": 111.748, "eval_steps_per_second": 1.746, "step": 550000 }, { "epoch": 9.56, "learning_rate": 4.992035346050262e-05, "loss": 0.8802, "step": 600000 }, { "epoch": 9.56, "eval_loss": 0.89084392786026, "eval_runtime": 1991.4984, "eval_samples_per_second": 112.034, "eval_steps_per_second": 1.751, "step": 600000 }, { "epoch": 10.36, "learning_rate": 4.992035346050262e-05, "loss": 0.8763, "step": 650000 }, { "epoch": 10.36, "eval_loss": 0.8895950317382812, "eval_runtime": 2006.0064, "eval_samples_per_second": 111.223, "eval_steps_per_second": 1.738, "step": 650000 }, { "epoch": 11.16, "learning_rate": 4.992035186690252e-05, "loss": 0.8725, "step": 700000 }, { "epoch": 11.16, "eval_loss": 0.8886296153068542, "eval_runtime": 2027.9335, "eval_samples_per_second": 110.021, "eval_steps_per_second": 1.719, "step": 700000 }, { "epoch": 11.95, "learning_rate": 4.992035186690252e-05, "loss": 0.8688, "step": 750000 }, { "epoch": 11.95, "eval_loss": 0.885003924369812, "eval_runtime": 1989.8507, "eval_samples_per_second": 112.127, "eval_steps_per_second": 1.752, "step": 750000 }, { "epoch": 12.75, "learning_rate": 4.992035186690252e-05, "loss": 0.8628, "step": 800000 }, { "epoch": 12.75, "eval_loss": 0.8833887577056885, "eval_runtime": 2010.1701, "eval_samples_per_second": 110.993, "eval_steps_per_second": 1.735, "step": 800000 }, { "epoch": 13.55, "learning_rate": 4.9920348679702315e-05, "loss": 0.8599, "step": 850000 }, { "epoch": 13.55, "eval_loss": 0.883805513381958, "eval_runtime": 2005.9725, "eval_samples_per_second": 111.225, "eval_steps_per_second": 1.738, "step": 850000 }, { "epoch": 14.34, "learning_rate": 4.9920348679702315e-05, "loss": 0.8572, "step": 900000 }, { "epoch": 14.34, "eval_loss": 0.8837567567825317, "eval_runtime": 2005.535, "eval_samples_per_second": 111.25, "eval_steps_per_second": 1.739, "step": 900000 }, { "epoch": 15.14, "learning_rate": 4.9920348679702315e-05, "loss": 0.8548, "step": 950000 }, { "epoch": 15.14, "eval_loss": 0.8825677037239075, "eval_runtime": 1984.894, "eval_samples_per_second": 112.407, "eval_steps_per_second": 1.757, "step": 950000 }, { "epoch": 15.94, "learning_rate": 4.992034549250211e-05, "loss": 0.8502, "step": 1000000 }, { "epoch": 15.94, "eval_loss": 0.8808427453041077, "eval_runtime": 2006.4913, "eval_samples_per_second": 111.197, "eval_steps_per_second": 1.738, "step": 1000000 }, { "epoch": 16.73, "learning_rate": 4.992034708610221e-05, "loss": 0.8471, "step": 1050000 }, { "epoch": 16.73, "eval_loss": 0.8812766075134277, "eval_runtime": 1998.6292, "eval_samples_per_second": 111.634, "eval_steps_per_second": 1.745, "step": 1050000 }, { "epoch": 17.53, "learning_rate": 4.992034708610221e-05, "loss": 0.8427, "step": 1100000 }, { "epoch": 17.53, "eval_loss": 0.8817498683929443, "eval_runtime": 1994.6872, "eval_samples_per_second": 111.855, "eval_steps_per_second": 1.748, "step": 1100000 }, { "epoch": 18.33, "learning_rate": 4.992034549250211e-05, "loss": 0.841, "step": 1150000 }, { "epoch": 18.33, "eval_loss": 0.8802331686019897, "eval_runtime": 1993.2844, "eval_samples_per_second": 111.933, "eval_steps_per_second": 1.749, "step": 1150000 }, { "epoch": 19.12, "learning_rate": 4.992034549250211e-05, "loss": 0.8399, "step": 1200000 }, { "epoch": 19.12, "eval_loss": 0.8813353180885315, "eval_runtime": 2003.661, "eval_samples_per_second": 111.354, "eval_steps_per_second": 1.74, "step": 1200000 }, { "epoch": 19.92, "learning_rate": 4.9920348679702315e-05, "loss": 0.8382, "step": 1250000 }, { "epoch": 19.92, "eval_loss": 0.8779821991920471, "eval_runtime": 1999.9414, "eval_samples_per_second": 111.561, "eval_steps_per_second": 1.744, "step": 1250000 } ], "logging_steps": 50000, "max_steps": 31375500, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 50000, "total_flos": 2.61291411431424e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }