{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.918918918918919, "eval_steps": 500, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36036036036036034, "grad_norm": 24.162766925724526, "learning_rate": 5e-06, "loss": 1.2352, "step": 10 }, { "epoch": 0.7207207207207207, "grad_norm": 2.460851975181821, "learning_rate": 5e-06, "loss": 1.129, "step": 20 }, { "epoch": 0.972972972972973, "eval_loss": 1.0856622457504272, "eval_runtime": 20.0808, "eval_samples_per_second": 37.249, "eval_steps_per_second": 0.598, "step": 27 }, { "epoch": 1.0810810810810811, "grad_norm": 1.5697438122223701, "learning_rate": 5e-06, "loss": 1.1105, "step": 30 }, { "epoch": 1.4414414414414414, "grad_norm": 1.4204201478767204, "learning_rate": 5e-06, "loss": 1.0347, "step": 40 }, { "epoch": 1.8018018018018018, "grad_norm": 1.4140162476830664, "learning_rate": 5e-06, "loss": 1.015, "step": 50 }, { "epoch": 1.981981981981982, "eval_loss": 1.0293084383010864, "eval_runtime": 19.208, "eval_samples_per_second": 38.942, "eval_steps_per_second": 0.625, "step": 55 }, { "epoch": 2.1621621621621623, "grad_norm": 2.291804896225563, "learning_rate": 5e-06, "loss": 1.0103, "step": 60 }, { "epoch": 2.5225225225225225, "grad_norm": 1.4769259269396882, "learning_rate": 5e-06, "loss": 0.9502, "step": 70 }, { "epoch": 2.8828828828828827, "grad_norm": 1.4935265215970417, "learning_rate": 5e-06, "loss": 0.939, "step": 80 }, { "epoch": 2.918918918918919, "eval_loss": 1.0073624849319458, "eval_runtime": 17.8833, "eval_samples_per_second": 41.827, "eval_steps_per_second": 0.671, "step": 81 }, { "epoch": 2.918918918918919, "step": 81, "total_flos": 135468637224960.0, "train_loss": 1.051608243106324, "train_runtime": 2954.7854, "train_samples_per_second": 14.423, "train_steps_per_second": 0.027 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 135468637224960.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }