{ "best_metric": 0.3108769357204437, "best_model_checkpoint": "./checkpoint-xlm-v-base/checkpoint-15000", "epoch": 0.6640694173897644, "eval_steps": 1000, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 46.0612678527832, "learning_rate": 1.9873510587163855e-05, "loss": 0.943, "step": 1000 }, { "epoch": 0.04, "eval_LOC_f1": 0.6007898672649095, "eval_ORG_f1": 0.5205864729691723, "eval_PER_f1": 0.6537866457692849, "eval_loss": 0.5755352973937988, "eval_overall_accuracy": 0.813493507822672, "eval_overall_f1": 0.5959796923883924, "eval_overall_precision": 0.571592844668358, "eval_overall_recall": 0.622540194436182, "eval_runtime": 909.7639, "eval_samples_per_second": 72.217, "eval_steps_per_second": 0.282, "step": 1000 }, { "epoch": 0.09, "grad_norm": 5.543514251708984, "learning_rate": 1.974702117432771e-05, "loss": 0.5825, "step": 2000 }, { "epoch": 0.09, "eval_LOC_f1": 0.6761863812260971, "eval_ORG_f1": 0.573783382357224, "eval_PER_f1": 0.7231398018028106, "eval_loss": 0.5157074332237244, "eval_overall_accuracy": 0.844698117312631, "eval_overall_f1": 0.664661446599853, "eval_overall_precision": 0.690302943433134, "eval_overall_recall": 0.640856646367237, "eval_runtime": 884.4485, "eval_samples_per_second": 74.284, "eval_steps_per_second": 0.291, "step": 2000 }, { "epoch": 0.13, "grad_norm": 30.680952072143555, "learning_rate": 1.9620531761491565e-05, "loss": 0.5153, "step": 3000 }, { "epoch": 0.13, "eval_LOC_f1": 0.6448332585613877, "eval_ORG_f1": 0.5780655943179445, "eval_PER_f1": 0.749264457627936, "eval_loss": 0.4841216504573822, "eval_overall_accuracy": 0.8415159100197845, "eval_overall_f1": 0.6599932560127353, "eval_overall_precision": 0.6612013701212998, "eval_overall_recall": 0.6587895486638383, "eval_runtime": 887.5751, "eval_samples_per_second": 74.022, "eval_steps_per_second": 0.29, "step": 3000 }, { "epoch": 0.18, "grad_norm": 14.460062026977539, "learning_rate": 1.949404234865542e-05, "loss": 0.4744, "step": 4000 }, { "epoch": 0.18, "eval_LOC_f1": 0.7135048963789569, "eval_ORG_f1": 0.6044746860257756, "eval_PER_f1": 0.7562344421814889, "eval_loss": 0.4284209907054901, "eval_overall_accuracy": 0.8650805108611886, "eval_overall_f1": 0.6945276906141412, "eval_overall_precision": 0.6951840204528166, "eval_overall_recall": 0.6938725989010129, "eval_runtime": 884.4505, "eval_samples_per_second": 74.283, "eval_steps_per_second": 0.291, "step": 4000 }, { "epoch": 0.22, "grad_norm": 2.3655357360839844, "learning_rate": 1.9367552935819272e-05, "loss": 0.4385, "step": 5000 }, { "epoch": 0.22, "eval_LOC_f1": 0.7295629535257298, "eval_ORG_f1": 0.6200475216886777, "eval_PER_f1": 0.7408250910983861, "eval_loss": 0.4239448308944702, "eval_overall_accuracy": 0.8635053351958905, "eval_overall_f1": 0.7025319684063077, "eval_overall_precision": 0.7043199823770524, "eval_overall_recall": 0.7007530096904989, "eval_runtime": 907.5971, "eval_samples_per_second": 72.389, "eval_steps_per_second": 0.283, "step": 5000 }, { "epoch": 0.27, "grad_norm": 9.867854118347168, "learning_rate": 1.924106352298313e-05, "loss": 0.4279, "step": 6000 }, { "epoch": 0.27, "eval_LOC_f1": 0.7370099725835874, "eval_ORG_f1": 0.6403813434199981, "eval_PER_f1": 0.7776639577500056, "eval_loss": 0.38233184814453125, "eval_overall_accuracy": 0.8816618781055326, "eval_overall_f1": 0.7238370468534203, "eval_overall_precision": 0.7385234418271267, "eval_overall_recall": 0.7097233746105797, "eval_runtime": 948.2573, "eval_samples_per_second": 69.285, "eval_steps_per_second": 0.271, "step": 6000 }, { "epoch": 0.31, "grad_norm": 25.877347946166992, "learning_rate": 1.9114574110146982e-05, "loss": 0.4099, "step": 7000 }, { "epoch": 0.31, "eval_LOC_f1": 0.7575684397708062, "eval_ORG_f1": 0.630874803840732, "eval_PER_f1": 0.7838224767358626, "eval_loss": 0.38040244579315186, "eval_overall_accuracy": 0.883136741379065, "eval_overall_f1": 0.7288917006049582, "eval_overall_precision": 0.7364569017865703, "eval_overall_recall": 0.7214803450381201, "eval_runtime": 907.1265, "eval_samples_per_second": 72.427, "eval_steps_per_second": 0.283, "step": 7000 }, { "epoch": 0.35, "grad_norm": 30.637121200561523, "learning_rate": 1.8988084697310836e-05, "loss": 0.3874, "step": 8000 }, { "epoch": 0.35, "eval_LOC_f1": 0.7474734456723695, "eval_ORG_f1": 0.6407748343462335, "eval_PER_f1": 0.7854664027017585, "eval_loss": 0.37021398544311523, "eval_overall_accuracy": 0.8872800498308584, "eval_overall_f1": 0.7295473133392094, "eval_overall_precision": 0.74022719948437, "eval_overall_recall": 0.7191712196878376, "eval_runtime": 884.9913, "eval_samples_per_second": 74.238, "eval_steps_per_second": 0.29, "step": 8000 }, { "epoch": 0.4, "grad_norm": 20.109619140625, "learning_rate": 1.8861595284474693e-05, "loss": 0.3841, "step": 9000 }, { "epoch": 0.4, "eval_LOC_f1": 0.7684967782745274, "eval_ORG_f1": 0.655326947582435, "eval_PER_f1": 0.789217873159736, "eval_loss": 0.3808096945285797, "eval_overall_accuracy": 0.8879015799879489, "eval_overall_f1": 0.743771496693436, "eval_overall_precision": 0.76602787456446, "eval_overall_recall": 0.7227718897255663, "eval_runtime": 885.0879, "eval_samples_per_second": 74.23, "eval_steps_per_second": 0.29, "step": 9000 }, { "epoch": 0.44, "grad_norm": 10.265982627868652, "learning_rate": 1.8735105871638546e-05, "loss": 0.3764, "step": 10000 }, { "epoch": 0.44, "eval_LOC_f1": 0.7831821749367751, "eval_ORG_f1": 0.6622161847467495, "eval_PER_f1": 0.7948864849077164, "eval_loss": 0.34247785806655884, "eval_overall_accuracy": 0.8911041208737209, "eval_overall_f1": 0.7509862429761675, "eval_overall_precision": 0.7436605881991772, "eval_overall_recall": 0.7584576608168825, "eval_runtime": 885.3889, "eval_samples_per_second": 74.205, "eval_steps_per_second": 0.29, "step": 10000 }, { "epoch": 0.49, "grad_norm": 4.401586055755615, "learning_rate": 1.86086164588024e-05, "loss": 0.3564, "step": 11000 }, { "epoch": 0.49, "eval_LOC_f1": 0.789610444706057, "eval_ORG_f1": 0.6701892389389907, "eval_PER_f1": 0.8054954166474735, "eval_loss": 0.35062676668167114, "eval_overall_accuracy": 0.8928101093201735, "eval_overall_f1": 0.7598466310260445, "eval_overall_precision": 0.7565214692509428, "eval_overall_recall": 0.7632011522144121, "eval_runtime": 885.0018, "eval_samples_per_second": 74.237, "eval_steps_per_second": 0.29, "step": 11000 }, { "epoch": 0.53, "grad_norm": 1.0011909008026123, "learning_rate": 1.8482127045966253e-05, "loss": 0.3484, "step": 12000 }, { "epoch": 0.53, "eval_LOC_f1": 0.7712024123633622, "eval_ORG_f1": 0.6809512535185331, "eval_PER_f1": 0.8100081183474653, "eval_loss": 0.37064051628112793, "eval_overall_accuracy": 0.8851477337194005, "eval_overall_f1": 0.758193057536852, "eval_overall_precision": 0.7375241450255696, "eval_overall_recall": 0.7800538534996947, "eval_runtime": 950.4008, "eval_samples_per_second": 69.129, "eval_steps_per_second": 0.27, "step": 12000 }, { "epoch": 0.58, "grad_norm": 13.891754150390625, "learning_rate": 1.8355637633130106e-05, "loss": 0.3563, "step": 13000 }, { "epoch": 0.58, "eval_LOC_f1": 0.7934942596408595, "eval_ORG_f1": 0.6686377545091862, "eval_PER_f1": 0.8152987398240509, "eval_loss": 0.3389296531677246, "eval_overall_accuracy": 0.8935766857734662, "eval_overall_f1": 0.7638085016673694, "eval_overall_precision": 0.7483700877298401, "eval_overall_recall": 0.7798973026284891, "eval_runtime": 970.0425, "eval_samples_per_second": 67.729, "eval_steps_per_second": 0.265, "step": 13000 }, { "epoch": 0.62, "grad_norm": 19.86951446533203, "learning_rate": 1.8229148220293963e-05, "loss": 0.3396, "step": 14000 }, { "epoch": 0.62, "eval_LOC_f1": 0.7943446440452429, "eval_ORG_f1": 0.6860717813631874, "eval_PER_f1": 0.808841180333809, "eval_loss": 0.33801111578941345, "eval_overall_accuracy": 0.8965501348456104, "eval_overall_f1": 0.767065352823492, "eval_overall_precision": 0.770362767931157, "eval_overall_recall": 0.7637960455249934, "eval_runtime": 892.9487, "eval_samples_per_second": 73.576, "eval_steps_per_second": 0.288, "step": 14000 }, { "epoch": 0.66, "grad_norm": 6.008892059326172, "learning_rate": 1.8102658807457817e-05, "loss": 0.3513, "step": 15000 }, { "epoch": 0.66, "eval_LOC_f1": 0.796750172086523, "eval_ORG_f1": 0.6869723599718148, "eval_PER_f1": 0.8222321051851345, "eval_loss": 0.3108769357204437, "eval_overall_accuracy": 0.900560597156957, "eval_overall_f1": 0.773478314631055, "eval_overall_precision": 0.7637860773210824, "eval_overall_recall": 0.7834196972306151, "eval_runtime": 885.3039, "eval_samples_per_second": 74.212, "eval_steps_per_second": 0.29, "step": 15000 } ], "logging_steps": 1000, "max_steps": 158116, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 4767734068578096.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }