{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 6120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7352941176470589, "grad_norm": 1.4399138689041138, "learning_rate": 1.8366013071895427e-05, "loss": 0.1305, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.9775613095979087, "eval_f1": 0.740566037735849, "eval_loss": 0.06862938404083252, "eval_precision": 0.6908690869086909, "eval_recall": 0.7979669631512071, "eval_runtime": 2.5207, "eval_samples_per_second": 366.567, "eval_steps_per_second": 46.019, "step": 680 }, { "epoch": 1.4705882352941178, "grad_norm": 3.059645652770996, "learning_rate": 1.6732026143790852e-05, "loss": 0.0475, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.9815448773808042, "eval_f1": 0.8041237113402061, "eval_loss": 0.067368283867836, "eval_precision": 0.7691415313225058, "eval_recall": 0.8424396442185514, "eval_runtime": 3.126, "eval_samples_per_second": 295.582, "eval_steps_per_second": 37.108, "step": 1360 }, { "epoch": 2.2058823529411766, "grad_norm": 2.6966614723205566, "learning_rate": 1.5098039215686276e-05, "loss": 0.0354, "step": 1500 }, { "epoch": 2.9411764705882355, "grad_norm": 3.640780210494995, "learning_rate": 1.3464052287581701e-05, "loss": 0.0222, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.982322917963401, "eval_f1": 0.8089053803339517, "eval_loss": 0.07790510356426239, "eval_precision": 0.7879518072289157, "eval_recall": 0.8310038119440915, "eval_runtime": 4.1074, "eval_samples_per_second": 224.961, "eval_steps_per_second": 28.242, "step": 2040 }, { "epoch": 3.6764705882352944, "grad_norm": 0.008734635077416897, "learning_rate": 1.1830065359477125e-05, "loss": 0.012, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.982789742312959, "eval_f1": 0.8168498168498168, "eval_loss": 0.0823572650551796, "eval_precision": 0.7861339600470035, "eval_recall": 0.8500635324015248, "eval_runtime": 2.5823, "eval_samples_per_second": 357.827, "eval_steps_per_second": 44.922, "step": 2720 }, { "epoch": 4.411764705882353, "grad_norm": 0.3800855576992035, "learning_rate": 1.0196078431372549e-05, "loss": 0.0081, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.9815137557575003, "eval_f1": 0.8112745098039216, "eval_loss": 0.10165887326002121, "eval_precision": 0.7834319526627219, "eval_recall": 0.841168996188056, "eval_runtime": 2.596, "eval_samples_per_second": 355.931, "eval_steps_per_second": 44.684, "step": 3400 }, { "epoch": 5.147058823529412, "grad_norm": 1.0274019241333008, "learning_rate": 8.562091503267974e-06, "loss": 0.0054, "step": 3500 }, { "epoch": 5.882352941176471, "grad_norm": 0.9368045926094055, "learning_rate": 6.928104575163399e-06, "loss": 0.0028, "step": 4000 }, { "epoch": 6.0, "eval_accuracy": 0.981607120627412, "eval_f1": 0.8175092478421702, "eval_loss": 0.11204753071069717, "eval_precision": 0.7940119760479042, "eval_recall": 0.8424396442185514, "eval_runtime": 2.6214, "eval_samples_per_second": 352.478, "eval_steps_per_second": 44.25, "step": 4080 }, { "epoch": 6.617647058823529, "grad_norm": 0.0018368919845670462, "learning_rate": 5.294117647058824e-06, "loss": 0.0021, "step": 4500 }, { "epoch": 7.0, "eval_accuracy": 0.9808290800448152, "eval_f1": 0.8186215235792019, "eval_loss": 0.12193118035793304, "eval_precision": 0.7808535178777394, "eval_recall": 0.8602287166454892, "eval_runtime": 2.6902, "eval_samples_per_second": 343.468, "eval_steps_per_second": 43.119, "step": 4760 }, { "epoch": 7.352941176470588, "grad_norm": 0.004133788403123617, "learning_rate": 3.6601307189542484e-06, "loss": 0.0011, "step": 5000 }, { "epoch": 8.0, "eval_accuracy": 0.9814826341341965, "eval_f1": 0.8152173913043478, "eval_loss": 0.12062280625104904, "eval_precision": 0.7767548906789413, "eval_recall": 0.8576874205844981, "eval_runtime": 2.6543, "eval_samples_per_second": 348.116, "eval_steps_per_second": 43.703, "step": 5440 }, { "epoch": 8.088235294117647, "grad_norm": 0.00526324100792408, "learning_rate": 2.0261437908496734e-06, "loss": 0.0018, "step": 5500 }, { "epoch": 8.823529411764707, "grad_norm": 0.01854279637336731, "learning_rate": 3.921568627450981e-07, "loss": 0.0005, "step": 6000 }, { "epoch": 9.0, "eval_accuracy": 0.9821050666002739, "eval_f1": 0.8220858895705522, "eval_loss": 0.1177670955657959, "eval_precision": 0.7947805456702254, "eval_recall": 0.8513341804320204, "eval_runtime": 2.7389, "eval_samples_per_second": 337.365, "eval_steps_per_second": 42.353, "step": 6120 } ], "logging_steps": 500, "max_steps": 6120, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 791914496183100.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }