{ "best_metric": 0.02599843218922615, "best_model_checkpoint": "./results4/checkpoint-2400", "epoch": 4.4036697247706424, "eval_steps": 200, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3669724770642202, "grad_norm": 6.81126594543457, "learning_rate": 6.666666666666667e-07, "loss": 1.8154, "step": 200 }, { "epoch": 0.3669724770642202, "eval_accuracy": 0.12834224598930483, "eval_f1": 0.053512789620671505, "eval_loss": 1.8153961896896362, "eval_precision": 0.03873999770058728, "eval_recall": 0.12834224598930483, "eval_runtime": 8.8051, "eval_samples_per_second": 106.189, "eval_steps_per_second": 26.576, "step": 200 }, { "epoch": 0.7339449541284404, "grad_norm": 9.484223365783691, "learning_rate": 1.3333333333333334e-06, "loss": 1.7996, "step": 400 }, { "epoch": 0.7339449541284404, "eval_accuracy": 0.20962566844919786, "eval_f1": 0.1699240887979582, "eval_loss": 1.776768684387207, "eval_precision": 0.2712506960219069, "eval_recall": 0.20962566844919786, "eval_runtime": 8.617, "eval_samples_per_second": 108.506, "eval_steps_per_second": 27.155, "step": 400 }, { "epoch": 1.1009174311926606, "grad_norm": 8.83604621887207, "learning_rate": 2.0000000000000003e-06, "loss": 1.7653, "step": 600 }, { "epoch": 1.1009174311926606, "eval_accuracy": 0.35508021390374334, "eval_f1": 0.34776363638784535, "eval_loss": 1.7193909883499146, "eval_precision": 0.5237977574987844, "eval_recall": 0.35508021390374334, "eval_runtime": 8.8621, "eval_samples_per_second": 105.506, "eval_steps_per_second": 26.405, "step": 600 }, { "epoch": 1.4678899082568808, "grad_norm": 12.071432113647461, "learning_rate": 2.666666666666667e-06, "loss": 1.7051, "step": 800 }, { "epoch": 1.4678899082568808, "eval_accuracy": 0.5401069518716578, "eval_f1": 0.5429174295461449, "eval_loss": 1.6069858074188232, "eval_precision": 0.6148066517946612, "eval_recall": 0.5401069518716578, "eval_runtime": 8.6363, "eval_samples_per_second": 108.263, "eval_steps_per_second": 27.095, "step": 800 }, { "epoch": 1.834862385321101, "grad_norm": 8.39781379699707, "learning_rate": 3.3333333333333333e-06, "loss": 1.561, "step": 1000 }, { "epoch": 1.834862385321101, "eval_accuracy": 0.6181818181818182, "eval_f1": 0.599751774424811, "eval_loss": 1.3523486852645874, "eval_precision": 0.6869881919032953, "eval_recall": 0.6181818181818182, "eval_runtime": 8.7123, "eval_samples_per_second": 107.32, "eval_steps_per_second": 26.859, "step": 1000 }, { "epoch": 2.2018348623853212, "grad_norm": 7.149374008178711, "learning_rate": 4.000000000000001e-06, "loss": 1.2159, "step": 1200 }, { "epoch": 2.2018348623853212, "eval_accuracy": 0.7561497326203208, "eval_f1": 0.7396853774240922, "eval_loss": 0.8623968958854675, "eval_precision": 0.7976461918397214, "eval_recall": 0.7561497326203208, "eval_runtime": 8.4876, "eval_samples_per_second": 110.161, "eval_steps_per_second": 27.57, "step": 1200 }, { "epoch": 2.5688073394495414, "grad_norm": 12.584464073181152, "learning_rate": 4.666666666666667e-06, "loss": 0.7501, "step": 1400 }, { "epoch": 2.5688073394495414, "eval_accuracy": 0.8909090909090909, "eval_f1": 0.8893217599642673, "eval_loss": 0.43212181329727173, "eval_precision": 0.8944228004598542, "eval_recall": 0.8909090909090909, "eval_runtime": 8.5383, "eval_samples_per_second": 109.507, "eval_steps_per_second": 27.406, "step": 1400 }, { "epoch": 2.9357798165137616, "grad_norm": 6.3582305908203125, "learning_rate": 4.99209709753674e-06, "loss": 0.4346, "step": 1600 }, { "epoch": 2.9357798165137616, "eval_accuracy": 0.9401069518716577, "eval_f1": 0.939586410891439, "eval_loss": 0.20562343299388885, "eval_precision": 0.942115798236324, "eval_recall": 0.9401069518716577, "eval_runtime": 8.5478, "eval_samples_per_second": 109.384, "eval_steps_per_second": 27.375, "step": 1600 }, { "epoch": 3.302752293577982, "grad_norm": 1.9859445095062256, "learning_rate": 4.929173350101025e-06, "loss": 0.1985, "step": 1800 }, { "epoch": 3.302752293577982, "eval_accuracy": 0.9796791443850268, "eval_f1": 0.9795557753030716, "eval_loss": 0.07811883836984634, "eval_precision": 0.9796698126299838, "eval_recall": 0.9796791443850268, "eval_runtime": 8.5151, "eval_samples_per_second": 109.804, "eval_steps_per_second": 27.48, "step": 1800 }, { "epoch": 3.669724770642202, "grad_norm": 3.169071912765503, "learning_rate": 4.804914636820517e-06, "loss": 0.1066, "step": 2000 }, { "epoch": 3.669724770642202, "eval_accuracy": 0.9828877005347594, "eval_f1": 0.9828341396664676, "eval_loss": 0.05222497880458832, "eval_precision": 0.9829524348459922, "eval_recall": 0.9828877005347594, "eval_runtime": 8.3944, "eval_samples_per_second": 111.384, "eval_steps_per_second": 27.876, "step": 2000 }, { "epoch": 4.036697247706422, "grad_norm": 0.22888700664043427, "learning_rate": 4.622458405228411e-06, "loss": 0.096, "step": 2200 }, { "epoch": 4.036697247706422, "eval_accuracy": 0.986096256684492, "eval_f1": 0.9860397886588865, "eval_loss": 0.037959374487400055, "eval_precision": 0.9862011528885352, "eval_recall": 0.986096256684492, "eval_runtime": 8.8571, "eval_samples_per_second": 105.565, "eval_steps_per_second": 26.42, "step": 2200 }, { "epoch": 4.4036697247706424, "grad_norm": 0.051910221576690674, "learning_rate": 4.386411550395576e-06, "loss": 0.0686, "step": 2400 }, { "epoch": 4.4036697247706424, "eval_accuracy": 0.9925133689839573, "eval_f1": 0.9925048378298892, "eval_loss": 0.02599843218922615, "eval_precision": 0.9925302733753678, "eval_recall": 0.9925133689839573, "eval_runtime": 8.5262, "eval_samples_per_second": 109.661, "eval_steps_per_second": 27.445, "step": 2400 } ], "logging_steps": 200, "max_steps": 5450, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 396630381488796.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }