{ "best_metric": 0.05933361500501633, "best_model_checkpoint": "runs/roberta-base-500000-samples-512-max-len-64-train-batch-size-8-test-batch-size-3-epochs-1e-05-lr-0.1-warmup-ratio/checkpoint-12000", "epoch": 2.1331058020477816, "eval_steps": 1500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.4971, "eval_f1": 0.6639222657346396, "eval_loss": 0.6927798390388489, "eval_precision": 0.4973866025833584, "eval_recall": 0.9981112361356695, "eval_runtime": 372.4207, "eval_samples_per_second": 134.257, "eval_steps_per_second": 16.782, "step": 0 }, { "epoch": 0.07110352673492605, "grad_norm": 11.443495750427246, "learning_rate": 2.369668246445498e-06, "loss": 0.4665, "step": 500 }, { "epoch": 0.1422070534698521, "grad_norm": 5.395324230194092, "learning_rate": 4.739336492890996e-06, "loss": 0.1226, "step": 1000 }, { "epoch": 0.21331058020477817, "grad_norm": 3.713897705078125, "learning_rate": 7.1090047393364935e-06, "loss": 0.1013, "step": 1500 }, { "epoch": 0.21331058020477817, "eval_accuracy": 0.96946, "eval_f1": 0.9694067677759302, "eval_loss": 0.08184666186571121, "eval_precision": 0.9665987454552719, "eval_recall": 0.9722311525478219, "eval_runtime": 369.1947, "eval_samples_per_second": 135.43, "eval_steps_per_second": 16.929, "step": 1500 }, { "epoch": 0.2844141069397042, "grad_norm": 22.85436248779297, "learning_rate": 9.478672985781992e-06, "loss": 0.0911, "step": 2000 }, { "epoch": 0.35551763367463024, "grad_norm": 2.183819532394409, "learning_rate": 9.794585484040872e-06, "loss": 0.0904, "step": 2500 }, { "epoch": 0.42662116040955633, "grad_norm": 3.5085792541503906, "learning_rate": 9.53123354050353e-06, "loss": 0.0853, "step": 3000 }, { "epoch": 0.42662116040955633, "eval_accuracy": 0.97452, "eval_f1": 0.97444332998997, "eval_loss": 0.06914982199668884, "eval_precision": 0.9728430665705359, "eval_recall": 0.9760488667416815, "eval_runtime": 369.1227, "eval_samples_per_second": 135.456, "eval_steps_per_second": 16.932, "step": 3000 }, { "epoch": 0.49772468714448237, "grad_norm": 6.5320234298706055, "learning_rate": 9.267881596966186e-06, "loss": 0.0799, "step": 3500 }, { "epoch": 0.5688282138794084, "grad_norm": 1.3349543809890747, "learning_rate": 9.004529653428843e-06, "loss": 0.0751, "step": 4000 }, { "epoch": 0.6399317406143344, "grad_norm": 1.4865925312042236, "learning_rate": 8.7411777098915e-06, "loss": 0.0742, "step": 4500 }, { "epoch": 0.6399317406143344, "eval_accuracy": 0.97572, "eval_f1": 0.9756136756257282, "eval_loss": 0.06528624147176743, "eval_precision": 0.9753393846895333, "eval_recall": 0.9758881208808873, "eval_runtime": 369.1379, "eval_samples_per_second": 135.451, "eval_steps_per_second": 16.931, "step": 4500 }, { "epoch": 0.7110352673492605, "grad_norm": 1.9182387590408325, "learning_rate": 8.477825766354156e-06, "loss": 0.0761, "step": 5000 }, { "epoch": 0.7821387940841866, "grad_norm": 4.340898036956787, "learning_rate": 8.214473822816812e-06, "loss": 0.0742, "step": 5500 }, { "epoch": 0.8532423208191127, "grad_norm": 4.503939151763916, "learning_rate": 7.95112187927947e-06, "loss": 0.0722, "step": 6000 }, { "epoch": 0.8532423208191127, "eval_accuracy": 0.97488, "eval_f1": 0.9744964262508122, "eval_loss": 0.06521258503198624, "eval_precision": 0.9848957478246594, "eval_recall": 0.9643144189037133, "eval_runtime": 369.1106, "eval_samples_per_second": 135.461, "eval_steps_per_second": 16.933, "step": 6000 }, { "epoch": 0.9243458475540387, "grad_norm": 5.767714977264404, "learning_rate": 7.687769935742126e-06, "loss": 0.0664, "step": 6500 }, { "epoch": 0.9954493742889647, "grad_norm": 1.0236719846725464, "learning_rate": 7.424417992204783e-06, "loss": 0.0687, "step": 7000 }, { "epoch": 1.0665529010238908, "grad_norm": 5.575131416320801, "learning_rate": 7.1610660486674395e-06, "loss": 0.0594, "step": 7500 }, { "epoch": 1.0665529010238908, "eval_accuracy": 0.9734, "eval_f1": 0.9729013854930725, "eval_loss": 0.07639238238334656, "eval_precision": 0.9867333443544387, "eval_recall": 0.9594518566146921, "eval_runtime": 369.1258, "eval_samples_per_second": 135.455, "eval_steps_per_second": 16.932, "step": 7500 }, { "epoch": 1.1376564277588168, "grad_norm": 1.4480912685394287, "learning_rate": 6.8977141051300965e-06, "loss": 0.0613, "step": 8000 }, { "epoch": 1.2087599544937428, "grad_norm": 14.01652717590332, "learning_rate": 6.6343621615927535e-06, "loss": 0.0626, "step": 8500 }, { "epoch": 1.2798634812286689, "grad_norm": 2.645029067993164, "learning_rate": 6.3710102180554104e-06, "loss": 0.0595, "step": 9000 }, { "epoch": 1.2798634812286689, "eval_accuracy": 0.97674, "eval_f1": 0.9764427069618586, "eval_loss": 0.0677267462015152, "eval_precision": 0.9843986113947315, "eval_recall": 0.968614370679955, "eval_runtime": 369.1238, "eval_samples_per_second": 135.456, "eval_steps_per_second": 16.932, "step": 9000 }, { "epoch": 1.350967007963595, "grad_norm": 4.5592122077941895, "learning_rate": 6.1076582745180666e-06, "loss": 0.0618, "step": 9500 }, { "epoch": 1.4220705346985212, "grad_norm": 5.417106628417969, "learning_rate": 5.8443063309807235e-06, "loss": 0.058, "step": 10000 }, { "epoch": 1.493174061433447, "grad_norm": 1.136661171913147, "learning_rate": 5.5809543874433805e-06, "loss": 0.0542, "step": 10500 }, { "epoch": 1.493174061433447, "eval_accuracy": 0.97848, "eval_f1": 0.9783422567529487, "eval_loss": 0.06500901281833649, "eval_precision": 0.9800387127994193, "eval_recall": 0.9766516637196592, "eval_runtime": 369.2146, "eval_samples_per_second": 135.423, "eval_steps_per_second": 16.928, "step": 10500 }, { "epoch": 1.5642775881683733, "grad_norm": 2.5331344604492188, "learning_rate": 5.317602443906037e-06, "loss": 0.0623, "step": 11000 }, { "epoch": 1.635381114903299, "grad_norm": 2.5099124908447266, "learning_rate": 5.054250500368693e-06, "loss": 0.0617, "step": 11500 }, { "epoch": 1.7064846416382253, "grad_norm": 0.18802767992019653, "learning_rate": 4.79089855683135e-06, "loss": 0.0571, "step": 12000 }, { "epoch": 1.7064846416382253, "eval_accuracy": 0.97944, "eval_f1": 0.9793847511330366, "eval_loss": 0.05933361500501633, "eval_precision": 0.9774637739172204, "eval_recall": 0.9813132936826877, "eval_runtime": 369.1545, "eval_samples_per_second": 135.445, "eval_steps_per_second": 16.931, "step": 12000 }, { "epoch": 1.7775881683731511, "grad_norm": 0.17306402325630188, "learning_rate": 4.527546613294007e-06, "loss": 0.0575, "step": 12500 }, { "epoch": 1.8486916951080774, "grad_norm": 2.0170910358428955, "learning_rate": 4.264194669756664e-06, "loss": 0.0573, "step": 13000 }, { "epoch": 1.9197952218430034, "grad_norm": 1.0754927396774292, "learning_rate": 4.00084272621932e-06, "loss": 0.0562, "step": 13500 }, { "epoch": 1.9197952218430034, "eval_accuracy": 0.9793, "eval_f1": 0.9792272955343703, "eval_loss": 0.05992409214377403, "eval_precision": 0.9781083356721864, "eval_recall": 0.9803488185179232, "eval_runtime": 369.2584, "eval_samples_per_second": 135.407, "eval_steps_per_second": 16.926, "step": 13500 }, { "epoch": 1.9908987485779295, "grad_norm": 0.5176452398300171, "learning_rate": 3.7374907826819767e-06, "loss": 0.0553, "step": 14000 }, { "epoch": 2.0620022753128557, "grad_norm": 3.9174857139587402, "learning_rate": 3.474138839144633e-06, "loss": 0.0506, "step": 14500 }, { "epoch": 2.1331058020477816, "grad_norm": 2.6643998622894287, "learning_rate": 3.21078689560729e-06, "loss": 0.0463, "step": 15000 }, { "epoch": 2.1331058020477816, "eval_accuracy": 0.97976, "eval_f1": 0.9796746334605343, "eval_loss": 0.05948900803923607, "eval_precision": 0.9792419497309885, "eval_recall": 0.9801076997267321, "eval_runtime": 369.2026, "eval_samples_per_second": 135.427, "eval_steps_per_second": 16.928, "step": 15000 } ], "logging_steps": 500, "max_steps": 21096, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5256135448428544e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }