{ "best_metric": 0.9609226586529557, "best_model_checkpoint": "test/checkpoint-1000", "epoch": 19.169329073482427, "eval_steps": 100, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.2779552715654952, "eval_accuracy": 0.9816936117791817, "eval_f1": 0.9385833639194645, "eval_loss": 0.05918360874056816, "eval_precision": 0.9250824202462911, "eval_recall": 0.9524842180444199, "eval_runtime": 165.0893, "eval_samples_per_second": 2.102, "eval_steps_per_second": 1.054, "step": 100 }, { "epoch": 2.5559105431309903, "eval_accuracy": 0.9845452196763693, "eval_f1": 0.9493651456881923, "eval_loss": 0.05179167166352272, "eval_precision": 0.9371622417885823, "eval_recall": 0.9618900331766658, "eval_runtime": 165.8874, "eval_samples_per_second": 2.092, "eval_steps_per_second": 1.049, "step": 200 }, { "epoch": 3.8338658146964857, "eval_accuracy": 0.9859493975213447, "eval_f1": 0.9534633937694555, "eval_loss": 0.05195005238056183, "eval_precision": 0.9441491113968228, "eval_recall": 0.9629632829539521, "eval_runtime": 165.8659, "eval_samples_per_second": 2.092, "eval_steps_per_second": 1.049, "step": 300 }, { "epoch": 5.111821086261981, "eval_accuracy": 0.9867111571310738, "eval_f1": 0.9563248176638556, "eval_loss": 0.05341144651174545, "eval_precision": 0.9464087629117955, "eval_recall": 0.9664508647436488, "eval_runtime": 165.8661, "eval_samples_per_second": 2.092, "eval_steps_per_second": 1.049, "step": 400 }, { "epoch": 6.389776357827476, "grad_norm": 1.3225843906402588, "learning_rate": 1.3333333333333333e-05, "loss": 0.0581, "step": 500 }, { "epoch": 6.389776357827476, "eval_accuracy": 0.9881758012724539, "eval_f1": 0.9607765866129376, "eval_loss": 0.05545532703399658, "eval_precision": 0.9561740033258127, "eval_recall": 0.9654236936687863, "eval_runtime": 166.1394, "eval_samples_per_second": 2.089, "eval_steps_per_second": 1.047, "step": 500 }, { "epoch": 7.667731629392971, "eval_accuracy": 0.9876475167880766, "eval_f1": 0.9593047596775092, "eval_loss": 0.05594188719987869, "eval_precision": 0.9519859864988651, "eval_recall": 0.9667369366878629, "eval_runtime": 165.0742, "eval_samples_per_second": 2.102, "eval_steps_per_second": 1.054, "step": 600 }, { "epoch": 8.945686900958467, "eval_accuracy": 0.9861775203668713, "eval_f1": 0.9541556967684495, "eval_loss": 0.06466494500637054, "eval_precision": 0.9495223935082022, "eval_recall": 0.9588344392221915, "eval_runtime": 165.512, "eval_samples_per_second": 2.097, "eval_steps_per_second": 1.051, "step": 700 }, { "epoch": 10.223642172523961, "eval_accuracy": 0.987931766243663, "eval_f1": 0.9600553547912903, "eval_loss": 0.07039085775613785, "eval_precision": 0.9571241565658107, "eval_recall": 0.9630045617915399, "eval_runtime": 165.1633, "eval_samples_per_second": 2.101, "eval_steps_per_second": 1.054, "step": 800 }, { "epoch": 11.501597444089457, "eval_accuracy": 0.9873081533159109, "eval_f1": 0.957732138438941, "eval_loss": 0.072791688144207, "eval_precision": 0.9476758884291113, "eval_recall": 0.9680041010045157, "eval_runtime": 166.1498, "eval_samples_per_second": 2.088, "eval_steps_per_second": 1.047, "step": 900 }, { "epoch": 12.779552715654953, "grad_norm": 1.008128046989441, "learning_rate": 6.666666666666667e-06, "loss": 0.0113, "step": 1000 }, { "epoch": 12.779552715654953, "eval_accuracy": 0.9883580680989368, "eval_f1": 0.9609226586529557, "eval_loss": 0.070257768034935, "eval_precision": 0.9555934230761928, "eval_recall": 0.9663116686634105, "eval_runtime": 165.5164, "eval_samples_per_second": 2.096, "eval_steps_per_second": 1.051, "step": 1000 }, { "epoch": 14.057507987220447, "eval_accuracy": 0.9876378248219065, "eval_f1": 0.9585960793498106, "eval_loss": 0.07850438356399536, "eval_precision": 0.9469678165075398, "eval_recall": 0.9705134703406753, "eval_runtime": 165.1708, "eval_samples_per_second": 2.101, "eval_steps_per_second": 1.053, "step": 1100 }, { "epoch": 15.335463258785943, "eval_accuracy": 0.9870376462004166, "eval_f1": 0.9562580148084375, "eval_loss": 0.0813131257891655, "eval_precision": 0.9487462712234356, "eval_recall": 0.9638896568672626, "eval_runtime": 165.2596, "eval_samples_per_second": 2.1, "eval_steps_per_second": 1.053, "step": 1200 }, { "epoch": 16.61341853035144, "eval_accuracy": 0.9879028350013641, "eval_f1": 0.9592724777593, "eval_loss": 0.0847664400935173, "eval_precision": 0.9523574792749595, "eval_recall": 0.9662886293121986, "eval_runtime": 166.5822, "eval_samples_per_second": 2.083, "eval_steps_per_second": 1.045, "step": 1300 }, { "epoch": 17.891373801916934, "eval_accuracy": 0.9879352379927389, "eval_f1": 0.9593296707640014, "eval_loss": 0.08636707812547684, "eval_precision": 0.9518098587623206, "eval_recall": 0.9669692501459158, "eval_runtime": 165.8109, "eval_samples_per_second": 2.093, "eval_steps_per_second": 1.049, "step": 1400 }, { "epoch": 19.169329073482427, "grad_norm": 0.3258835971355438, "learning_rate": 0.0, "loss": 0.0039, "step": 1500 }, { "epoch": 19.169329073482427, "eval_accuracy": 0.98795664711204, "eval_f1": 0.959393611777229, "eval_loss": 0.08625596016645432, "eval_precision": 0.9520241523929792, "eval_recall": 0.9668780527140356, "eval_runtime": 164.9718, "eval_samples_per_second": 2.103, "eval_steps_per_second": 1.055, "step": 1500 } ], "logging_steps": 500, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3163207421952000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }