{ "best_metric": 1.2482833862304688, "best_model_checkpoint": "./outputs/checkpoint-3800", "epoch": 2.768670309653916, "eval_steps": 100, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.1911, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.0239477157592773, "eval_runtime": 143.6069, "eval_samples_per_second": 43.689, "eval_steps_per_second": 5.466, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.9736, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.93068528175354, "eval_runtime": 143.6288, "eval_samples_per_second": 43.682, "eval_steps_per_second": 5.465, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.9, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.8703595399856567, "eval_runtime": 143.4955, "eval_samples_per_second": 43.723, "eval_steps_per_second": 5.471, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8484, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.8250001668930054, "eval_runtime": 143.5707, "eval_samples_per_second": 43.7, "eval_steps_per_second": 5.468, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.7975, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.7833176851272583, "eval_runtime": 143.7076, "eval_samples_per_second": 43.658, "eval_steps_per_second": 5.462, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.769, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.755702018737793, "eval_runtime": 143.6171, "eval_samples_per_second": 43.686, "eval_steps_per_second": 5.466, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7302, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7235804796218872, "eval_runtime": 143.5461, "eval_samples_per_second": 43.707, "eval_steps_per_second": 5.469, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.709, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.694319248199463, "eval_runtime": 143.4905, "eval_samples_per_second": 43.724, "eval_steps_per_second": 5.471, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.6738, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6700080633163452, "eval_runtime": 143.5611, "eval_samples_per_second": 43.703, "eval_steps_per_second": 5.468, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6587, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6456865072250366, "eval_runtime": 143.5526, "eval_samples_per_second": 43.705, "eval_steps_per_second": 5.468, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6381, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.6236809492111206, "eval_runtime": 143.4948, "eval_samples_per_second": 43.723, "eval_steps_per_second": 5.471, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.6032, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.6063587665557861, "eval_runtime": 143.5371, "eval_samples_per_second": 43.71, "eval_steps_per_second": 5.469, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.6099, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.5829013586044312, "eval_runtime": 143.5391, "eval_samples_per_second": 43.709, "eval_steps_per_second": 5.469, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5636, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.5621864795684814, "eval_runtime": 143.917, "eval_samples_per_second": 43.595, "eval_steps_per_second": 5.455, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.5352, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.5466724634170532, "eval_runtime": 143.4292, "eval_samples_per_second": 43.743, "eval_steps_per_second": 5.473, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.5165, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.5282047986984253, "eval_runtime": 152.278, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.155, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5146, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.5113346576690674, "eval_runtime": 143.5758, "eval_samples_per_second": 43.698, "eval_steps_per_second": 5.467, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.4882, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.4970587491989136, "eval_runtime": 143.4845, "eval_samples_per_second": 43.726, "eval_steps_per_second": 5.471, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4774, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.48170804977417, "eval_runtime": 143.5274, "eval_samples_per_second": 43.713, "eval_steps_per_second": 5.469, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4619, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.4683845043182373, "eval_runtime": 143.5254, "eval_samples_per_second": 43.714, "eval_steps_per_second": 5.469, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4467, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.4535853862762451, "eval_runtime": 143.4994, "eval_samples_per_second": 43.721, "eval_steps_per_second": 5.47, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4209, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.438491940498352, "eval_runtime": 143.5083, "eval_samples_per_second": 43.719, "eval_steps_per_second": 5.47, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4197, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4254748821258545, "eval_runtime": 143.6399, "eval_samples_per_second": 43.679, "eval_steps_per_second": 5.465, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4198, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.4130115509033203, "eval_runtime": 143.5484, "eval_samples_per_second": 43.707, "eval_steps_per_second": 5.469, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3909, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.4006593227386475, "eval_runtime": 143.5037, "eval_samples_per_second": 43.72, "eval_steps_per_second": 5.47, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3813, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.3870867490768433, "eval_runtime": 143.5677, "eval_samples_per_second": 43.701, "eval_steps_per_second": 5.468, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3693, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.3732330799102783, "eval_runtime": 143.6296, "eval_samples_per_second": 43.682, "eval_steps_per_second": 5.465, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.3409, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.3628050088882446, "eval_runtime": 143.5437, "eval_samples_per_second": 43.708, "eval_steps_per_second": 5.469, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.3156, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.3481007814407349, "eval_runtime": 143.5887, "eval_samples_per_second": 43.694, "eval_steps_per_second": 5.467, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.316, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.3366949558258057, "eval_runtime": 143.6048, "eval_samples_per_second": 43.689, "eval_steps_per_second": 5.466, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.2964, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.3265715837478638, "eval_runtime": 143.6034, "eval_samples_per_second": 43.69, "eval_steps_per_second": 5.466, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.3005, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.3147220611572266, "eval_runtime": 143.6246, "eval_samples_per_second": 43.683, "eval_steps_per_second": 5.466, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.2811, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.3015214204788208, "eval_runtime": 143.5355, "eval_samples_per_second": 43.71, "eval_steps_per_second": 5.469, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.2626, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.2932065725326538, "eval_runtime": 143.6069, "eval_samples_per_second": 43.689, "eval_steps_per_second": 5.466, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.2635, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.28367018699646, "eval_runtime": 143.5245, "eval_samples_per_second": 43.714, "eval_steps_per_second": 5.469, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.2518, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.2685626745224, "eval_runtime": 143.6487, "eval_samples_per_second": 43.676, "eval_steps_per_second": 5.465, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.2361, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.257932186126709, "eval_runtime": 143.6501, "eval_samples_per_second": 43.676, "eval_steps_per_second": 5.465, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.2134, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.2482833862304688, "eval_runtime": 143.6429, "eval_samples_per_second": 43.678, "eval_steps_per_second": 5.465, "step": 3800 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.2648457919379046e+17, "trial_name": null, "trial_params": null }