{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.913978494623656, "eval_steps": 200, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.54, "learning_rate": 0.0002, "loss": 0.1582, "step": 200 }, { "epoch": 0.54, "eval_runtime": 113.8373, "eval_samples_per_second": 2.75, "eval_steps_per_second": 0.176, "step": 200 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 0.077, "step": 400 }, { "epoch": 1.08, "eval_runtime": 113.8308, "eval_samples_per_second": 2.75, "eval_steps_per_second": 0.176, "step": 400 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 0.0598, "step": 600 }, { "epoch": 1.61, "eval_runtime": 113.7433, "eval_samples_per_second": 2.752, "eval_steps_per_second": 0.176, "step": 600 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 0.0552, "step": 800 }, { "epoch": 2.15, "eval_runtime": 113.7453, "eval_samples_per_second": 2.752, "eval_steps_per_second": 0.176, "step": 800 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 0.0418, "step": 1000 }, { "epoch": 2.69, "eval_runtime": 113.7299, "eval_samples_per_second": 2.752, "eval_steps_per_second": 0.176, "step": 1000 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 0.0396, "step": 1200 }, { "epoch": 3.23, "eval_runtime": 113.7472, "eval_samples_per_second": 2.752, "eval_steps_per_second": 0.176, "step": 1200 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 0.0333, "step": 1400 }, { "epoch": 3.76, "eval_runtime": 113.6577, "eval_samples_per_second": 2.754, "eval_steps_per_second": 0.176, "step": 1400 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 0.0303, "step": 1600 }, { "epoch": 4.3, "eval_runtime": 113.7892, "eval_samples_per_second": 2.751, "eval_steps_per_second": 0.176, "step": 1600 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 0.028, "step": 1800 }, { "epoch": 4.84, "eval_runtime": 113.8547, "eval_samples_per_second": 2.749, "eval_steps_per_second": 0.176, "step": 1800 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 0.0255, "step": 2000 }, { "epoch": 5.38, "eval_runtime": 113.7853, "eval_samples_per_second": 2.751, "eval_steps_per_second": 0.176, "step": 2000 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 0.0259, "step": 2200 }, { "epoch": 5.91, "eval_runtime": 113.6789, "eval_samples_per_second": 2.753, "eval_steps_per_second": 0.176, "step": 2200 } ], "logging_steps": 200, "max_steps": 2232, "num_train_epochs": 6, "save_steps": 200, "total_flos": 1.048086317730816e+18, "trial_name": null, "trial_params": null }