{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 4578, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.764525993883792, "grad_norm": 2.860719919204712, "learning_rate": 1.781564001747488e-05, "loss": 0.2481, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.9536874012447196, "eval_f1": 0.8924138535986316, "eval_loss": 0.13802066445350647, "eval_precision": 0.8979934094386254, "eval_recall": 0.8869032051840874, "eval_runtime": 25.0596, "eval_samples_per_second": 212.693, "eval_steps_per_second": 26.617, "step": 654 }, { "epoch": 1.529051987767584, "grad_norm": 2.250061273574829, "learning_rate": 1.563128003494976e-05, "loss": 0.0943, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.9569120634613524, "eval_f1": 0.9029714891848114, "eval_loss": 0.1516450196504593, "eval_precision": 0.891004243281471, "eval_recall": 0.9152645802458373, "eval_runtime": 25.372, "eval_samples_per_second": 210.074, "eval_steps_per_second": 26.289, "step": 1308 }, { "epoch": 2.293577981651376, "grad_norm": 0.9499523043632507, "learning_rate": 1.344692005242464e-05, "loss": 0.0595, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.9584212053787365, "eval_f1": 0.9057778035083657, "eval_loss": 0.17137625813484192, "eval_precision": 0.9023039880042677, "eval_recall": 0.909278470345509, "eval_runtime": 24.0746, "eval_samples_per_second": 221.395, "eval_steps_per_second": 27.706, "step": 1962 }, { "epoch": 3.058103975535168, "grad_norm": 0.29300621151924133, "learning_rate": 1.126256006989952e-05, "loss": 0.0405, "step": 2000 }, { "epoch": 3.8226299694189603, "grad_norm": 0.09997886419296265, "learning_rate": 9.0782000873744e-06, "loss": 0.0218, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.9578988100996421, "eval_f1": 0.9073512721421696, "eval_loss": 0.20640073716640472, "eval_precision": 0.8972836278911178, "eval_recall": 0.9176474006915991, "eval_runtime": 23.267, "eval_samples_per_second": 229.08, "eval_steps_per_second": 28.667, "step": 2616 }, { "epoch": 4.587155963302752, "grad_norm": 1.0946885347366333, "learning_rate": 6.89384010484928e-06, "loss": 0.0121, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.9587372222759666, "eval_f1": 0.9086317256194029, "eval_loss": 0.22833754122257233, "eval_precision": 0.9076963229323745, "eval_recall": 0.9095690582047482, "eval_runtime": 23.0423, "eval_samples_per_second": 231.314, "eval_steps_per_second": 28.947, "step": 3270 }, { "epoch": 5.351681957186544, "grad_norm": 0.06167488545179367, "learning_rate": 4.70948012232416e-06, "loss": 0.0106, "step": 3500 }, { "epoch": 6.0, "eval_accuracy": 0.9577440263132437, "eval_f1": 0.9077642656688494, "eval_loss": 0.24395699799060822, "eval_precision": 0.8992358576642335, "eval_recall": 0.9164559904687182, "eval_runtime": 29.2986, "eval_samples_per_second": 181.92, "eval_steps_per_second": 22.766, "step": 3924 }, { "epoch": 6.116207951070336, "grad_norm": 0.0039491127245128155, "learning_rate": 2.5251201397990393e-06, "loss": 0.0067, "step": 4000 }, { "epoch": 6.8807339449541285, "grad_norm": 0.015259736217558384, "learning_rate": 3.407601572739188e-07, "loss": 0.005, "step": 4500 }, { "epoch": 7.0, "eval_accuracy": 0.9576085905001451, "eval_f1": 0.9074234296591107, "eval_loss": 0.2515053153038025, "eval_precision": 0.9006440532417347, "eval_recall": 0.9143056403103479, "eval_runtime": 24.477, "eval_samples_per_second": 217.756, "eval_steps_per_second": 27.25, "step": 4578 } ], "logging_steps": 500, "max_steps": 4578, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1242291550958640.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }