{ "best_metric": 0.326568067073822, "best_model_checkpoint": "sep-20/checkpoint-184", "epoch": 1.0, "eval_steps": 500, "global_step": 184, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04891304347826087, "grad_norm": 6.508527755737305, "learning_rate": 2.368421052631579e-05, "loss": 0.9057, "step": 9 }, { "epoch": 0.09782608695652174, "grad_norm": 6.560257434844971, "learning_rate": 4.736842105263158e-05, "loss": 0.4782, "step": 18 }, { "epoch": 0.14673913043478262, "grad_norm": 5.121021270751953, "learning_rate": 4.7575757575757576e-05, "loss": 0.3964, "step": 27 }, { "epoch": 0.1956521739130435, "grad_norm": 5.416026592254639, "learning_rate": 4.484848484848485e-05, "loss": 0.461, "step": 36 }, { "epoch": 0.24456521739130435, "grad_norm": 3.697016716003418, "learning_rate": 4.212121212121212e-05, "loss": 0.6009, "step": 45 }, { "epoch": 0.29347826086956524, "grad_norm": 3.856910467147827, "learning_rate": 3.939393939393939e-05, "loss": 0.2506, "step": 54 }, { "epoch": 0.3423913043478261, "grad_norm": 14.140103340148926, "learning_rate": 3.6666666666666666e-05, "loss": 0.396, "step": 63 }, { "epoch": 0.391304347826087, "grad_norm": 4.570571422576904, "learning_rate": 3.3939393939393945e-05, "loss": 0.3124, "step": 72 }, { "epoch": 0.44021739130434784, "grad_norm": 11.288423538208008, "learning_rate": 3.121212121212122e-05, "loss": 0.5017, "step": 81 }, { "epoch": 0.4891304347826087, "grad_norm": 10.446467399597168, "learning_rate": 2.8484848484848486e-05, "loss": 0.2368, "step": 90 }, { "epoch": 0.5380434782608695, "grad_norm": 1.5973994731903076, "learning_rate": 2.575757575757576e-05, "loss": 0.2517, "step": 99 }, { "epoch": 0.5869565217391305, "grad_norm": 0.7517912983894348, "learning_rate": 2.3030303030303034e-05, "loss": 0.571, "step": 108 }, { "epoch": 0.6358695652173914, "grad_norm": 7.41497278213501, "learning_rate": 2.0303030303030303e-05, "loss": 0.2846, "step": 117 }, { "epoch": 0.6847826086956522, "grad_norm": 4.097425937652588, "learning_rate": 1.7575757575757576e-05, "loss": 0.4801, "step": 126 }, { "epoch": 0.7336956521739131, "grad_norm": 4.8678669929504395, "learning_rate": 1.484848484848485e-05, "loss": 0.3254, "step": 135 }, { "epoch": 0.782608695652174, "grad_norm": 0.7251734733581543, "learning_rate": 1.2121212121212122e-05, "loss": 0.1278, "step": 144 }, { "epoch": 0.8315217391304348, "grad_norm": 10.660299301147461, "learning_rate": 9.393939393939394e-06, "loss": 0.5605, "step": 153 }, { "epoch": 0.8804347826086957, "grad_norm": 0.7391616702079773, "learning_rate": 6.666666666666667e-06, "loss": 0.3178, "step": 162 }, { "epoch": 0.9293478260869565, "grad_norm": 0.3952378034591675, "learning_rate": 3.939393939393939e-06, "loss": 0.3842, "step": 171 }, { "epoch": 0.9782608695652174, "grad_norm": 5.111515522003174, "learning_rate": 1.2121212121212122e-06, "loss": 0.2226, "step": 180 }, { "epoch": 1.0, "eval_accuracy": 0.8994565217391305, "eval_f1_macro": 0.574992771283407, "eval_f1_micro": 0.8994565217391305, "eval_f1_weighted": 0.8962362631935303, "eval_loss": 0.326568067073822, "eval_precision_macro": 0.5852941598199051, "eval_precision_micro": 0.8994565217391305, "eval_precision_weighted": 0.8948775064010129, "eval_recall_macro": 0.566717099325795, "eval_recall_micro": 0.8994565217391305, "eval_recall_weighted": 0.8994565217391305, "eval_runtime": 108.1675, "eval_samples_per_second": 3.402, "eval_steps_per_second": 0.213, "step": 184 } ], "logging_steps": 9, "max_steps": 184, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.138367530175017e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }