{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.4608, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 4.800000000000001e-06, "loss": 10.7159, "step": 97 }, { "epoch": 0.99, "eval_loss": 7.062511920928955, "eval_runtime": 16.5468, "eval_samples_per_second": 604.347, "eval_steps_per_second": 37.772, "step": 97 }, { "epoch": 1.99, "learning_rate": 9.65e-06, "loss": 5.0371, "step": 194 }, { "epoch": 1.99, "eval_loss": 2.8384995460510254, "eval_runtime": 16.0338, "eval_samples_per_second": 623.684, "eval_steps_per_second": 38.98, "step": 194 }, { "epoch": 2.99, "learning_rate": 1.4449999999999999e-05, "loss": 2.4865, "step": 291 }, { "epoch": 2.99, "eval_loss": 1.1602892875671387, "eval_runtime": 16.2241, "eval_samples_per_second": 616.367, "eval_steps_per_second": 38.523, "step": 291 }, { "epoch": 3.99, "learning_rate": 1.93e-05, "loss": 1.0806, "step": 388 }, { "epoch": 3.99, "eval_loss": 0.7614782452583313, "eval_runtime": 16.2665, "eval_samples_per_second": 614.761, "eval_steps_per_second": 38.423, "step": 388 }, { "epoch": 4.99, "learning_rate": 2.415e-05, "loss": 0.8412, "step": 485 }, { "epoch": 4.99, "eval_loss": 0.726601243019104, "eval_runtime": 16.7852, "eval_samples_per_second": 595.764, "eval_steps_per_second": 37.235, "step": 485 }, { "epoch": 5.99, "learning_rate": 2.9e-05, "loss": 0.802, "step": 582 }, { "epoch": 5.99, "eval_loss": 0.7105833292007446, "eval_runtime": 16.763, "eval_samples_per_second": 596.553, "eval_steps_per_second": 37.285, "step": 582 }, { "epoch": 6.99, "learning_rate": 3.385e-05, "loss": 0.7776, "step": 679 }, { "epoch": 6.99, "eval_loss": 0.6986653804779053, "eval_runtime": 16.7875, "eval_samples_per_second": 595.68, "eval_steps_per_second": 37.23, "step": 679 }, { "epoch": 7.99, "learning_rate": 3.8700000000000006e-05, "loss": 0.7568, "step": 776 }, { "epoch": 7.99, "eval_loss": 0.687833309173584, "eval_runtime": 16.4861, "eval_samples_per_second": 606.571, "eval_steps_per_second": 37.911, "step": 776 }, { "epoch": 8.99, "learning_rate": 4.355e-05, "loss": 0.7382, "step": 873 }, { "epoch": 8.99, "eval_loss": 0.6793721914291382, "eval_runtime": 16.6577, "eval_samples_per_second": 600.323, "eval_steps_per_second": 37.52, "step": 873 }, { "epoch": 9.99, "learning_rate": 4.8400000000000004e-05, "loss": 0.7202, "step": 970 }, { "epoch": 9.99, "eval_loss": 0.6695303320884705, "eval_runtime": 16.3181, "eval_samples_per_second": 612.817, "eval_steps_per_second": 38.301, "step": 970 }, { "epoch": 10.99, "learning_rate": 4.6542553191489364e-05, "loss": 0.7022, "step": 1067 }, { "epoch": 10.99, "eval_loss": 0.6605609059333801, "eval_runtime": 16.748, "eval_samples_per_second": 597.086, "eval_steps_per_second": 37.318, "step": 1067 }, { "epoch": 11.99, "learning_rate": 4.138297872340426e-05, "loss": 0.6844, "step": 1164 }, { "epoch": 11.99, "eval_loss": 0.6539720892906189, "eval_runtime": 16.7037, "eval_samples_per_second": 598.671, "eval_steps_per_second": 37.417, "step": 1164 }, { "epoch": 12.99, "learning_rate": 3.622340425531915e-05, "loss": 0.669, "step": 1261 }, { "epoch": 12.99, "eval_loss": 0.6471053957939148, "eval_runtime": 16.5172, "eval_samples_per_second": 605.43, "eval_steps_per_second": 37.839, "step": 1261 }, { "epoch": 13.99, "learning_rate": 3.1063829787234046e-05, "loss": 0.6562, "step": 1358 }, { "epoch": 13.99, "eval_loss": 0.6417160034179688, "eval_runtime": 16.0822, "eval_samples_per_second": 621.804, "eval_steps_per_second": 38.863, "step": 1358 }, { "epoch": 14.99, "learning_rate": 2.590425531914894e-05, "loss": 0.6453, "step": 1455 }, { "epoch": 14.99, "eval_loss": 0.6380994915962219, "eval_runtime": 16.4132, "eval_samples_per_second": 609.265, "eval_steps_per_second": 38.079, "step": 1455 } ], "max_steps": 1940, "num_train_epochs": 20, "total_flos": 5.89188339597312e+16, "trial_name": null, "trial_params": null }