|
{ |
|
"best_metric": 0.20468038320541382, |
|
"best_model_checkpoint": "autotrain-o5mxr-bsop6/checkpoint-753", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 753, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.099601593625498, |
|
"grad_norm": 5.523846626281738, |
|
"learning_rate": 1.6447368421052635e-05, |
|
"loss": 1.3608, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.199203187250996, |
|
"grad_norm": 4.111423969268799, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 1.0428, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29880478087649404, |
|
"grad_norm": 5.932701110839844, |
|
"learning_rate": 4.9342105263157894e-05, |
|
"loss": 0.824, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.398406374501992, |
|
"grad_norm": 14.16284465789795, |
|
"learning_rate": 4.82274741506647e-05, |
|
"loss": 0.8565, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.49800796812749004, |
|
"grad_norm": 7.437353610992432, |
|
"learning_rate": 4.638109305760709e-05, |
|
"loss": 0.7849, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5976095617529881, |
|
"grad_norm": 14.002378463745117, |
|
"learning_rate": 4.453471196454949e-05, |
|
"loss": 0.6819, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6972111553784861, |
|
"grad_norm": 7.128929138183594, |
|
"learning_rate": 4.2688330871491875e-05, |
|
"loss": 0.9145, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"grad_norm": 7.11976432800293, |
|
"learning_rate": 4.084194977843427e-05, |
|
"loss": 0.7596, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.896414342629482, |
|
"grad_norm": 10.390266418457031, |
|
"learning_rate": 3.8995568685376664e-05, |
|
"loss": 0.704, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9960159362549801, |
|
"grad_norm": 8.346457481384277, |
|
"learning_rate": 3.714918759231906e-05, |
|
"loss": 0.6437, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8041958041958042, |
|
"eval_f1_macro": 0.36845586105500655, |
|
"eval_f1_micro": 0.8041958041958042, |
|
"eval_f1_weighted": 0.7771738216042354, |
|
"eval_loss": 0.5281614065170288, |
|
"eval_precision_macro": 0.4761472822311331, |
|
"eval_precision_micro": 0.8041958041958042, |
|
"eval_precision_weighted": 0.80077693322795, |
|
"eval_recall_macro": 0.36810085833863637, |
|
"eval_recall_micro": 0.8041958041958042, |
|
"eval_recall_weighted": 0.8041958041958042, |
|
"eval_runtime": 359.8505, |
|
"eval_samples_per_second": 5.563, |
|
"eval_steps_per_second": 0.35, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.095617529880478, |
|
"grad_norm": 4.572272300720215, |
|
"learning_rate": 3.5302806499261446e-05, |
|
"loss": 0.6044, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.1952191235059761, |
|
"grad_norm": 3.9493234157562256, |
|
"learning_rate": 3.345642540620384e-05, |
|
"loss": 0.6022, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.294820717131474, |
|
"grad_norm": 6.116109848022461, |
|
"learning_rate": 3.1610044313146235e-05, |
|
"loss": 0.494, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.3944223107569722, |
|
"grad_norm": 11.72847843170166, |
|
"learning_rate": 2.976366322008863e-05, |
|
"loss": 0.5509, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.4940239043824701, |
|
"grad_norm": 6.265529632568359, |
|
"learning_rate": 2.791728212703102e-05, |
|
"loss": 0.5268, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"grad_norm": 5.475246906280518, |
|
"learning_rate": 2.6070901033973415e-05, |
|
"loss": 0.6499, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6932270916334662, |
|
"grad_norm": 5.1211256980896, |
|
"learning_rate": 2.4224519940915806e-05, |
|
"loss": 0.5373, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.792828685258964, |
|
"grad_norm": 2.6638262271881104, |
|
"learning_rate": 2.23781388478582e-05, |
|
"loss": 0.4476, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.8924302788844622, |
|
"grad_norm": 45.47649383544922, |
|
"learning_rate": 2.053175775480059e-05, |
|
"loss": 0.5096, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.9920318725099602, |
|
"grad_norm": 6.048641204833984, |
|
"learning_rate": 1.8685376661742986e-05, |
|
"loss": 0.424, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9100899100899101, |
|
"eval_f1_macro": 0.4902522309112827, |
|
"eval_f1_micro": 0.9100899100899101, |
|
"eval_f1_weighted": 0.9024494723879539, |
|
"eval_loss": 0.2924034893512726, |
|
"eval_precision_macro": 0.5296504253394911, |
|
"eval_precision_micro": 0.9100899100899101, |
|
"eval_precision_weighted": 0.9052689915360643, |
|
"eval_recall_macro": 0.4726016384805666, |
|
"eval_recall_micro": 0.9100899100899101, |
|
"eval_recall_weighted": 0.9100899100899101, |
|
"eval_runtime": 365.8456, |
|
"eval_samples_per_second": 5.472, |
|
"eval_steps_per_second": 0.344, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.091633466135458, |
|
"grad_norm": 2.1055290699005127, |
|
"learning_rate": 1.6838995568685377e-05, |
|
"loss": 0.4131, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.191235059760956, |
|
"grad_norm": 22.558122634887695, |
|
"learning_rate": 1.499261447562777e-05, |
|
"loss": 0.3949, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.2908366533864544, |
|
"grad_norm": 9.2990083694458, |
|
"learning_rate": 1.3146233382570164e-05, |
|
"loss": 0.2408, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.3904382470119523, |
|
"grad_norm": 23.017541885375977, |
|
"learning_rate": 1.1299852289512555e-05, |
|
"loss": 0.2457, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.49003984063745, |
|
"grad_norm": 0.9120715260505676, |
|
"learning_rate": 9.45347119645495e-06, |
|
"loss": 0.3485, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.589641434262948, |
|
"grad_norm": 2.4525136947631836, |
|
"learning_rate": 7.607090103397342e-06, |
|
"loss": 0.2528, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.6892430278884465, |
|
"grad_norm": 10.707359313964844, |
|
"learning_rate": 5.760709010339734e-06, |
|
"loss": 0.298, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.7888446215139444, |
|
"grad_norm": 24.448265075683594, |
|
"learning_rate": 3.914327917282127e-06, |
|
"loss": 0.2448, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.8884462151394423, |
|
"grad_norm": 1.235875129699707, |
|
"learning_rate": 2.06794682422452e-06, |
|
"loss": 0.4039, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.9880478087649402, |
|
"grad_norm": 9.670615196228027, |
|
"learning_rate": 2.215657311669129e-07, |
|
"loss": 0.3307, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9440559440559441, |
|
"eval_f1_macro": 0.5346970986126391, |
|
"eval_f1_micro": 0.9440559440559441, |
|
"eval_f1_weighted": 0.9410445215583293, |
|
"eval_loss": 0.20468038320541382, |
|
"eval_precision_macro": 0.5573908674461452, |
|
"eval_precision_micro": 0.9440559440559441, |
|
"eval_precision_weighted": 0.9419302500177272, |
|
"eval_recall_macro": 0.5198802592658838, |
|
"eval_recall_micro": 0.9440559440559441, |
|
"eval_recall_weighted": 0.9440559440559441, |
|
"eval_runtime": 338.5361, |
|
"eval_samples_per_second": 5.914, |
|
"eval_steps_per_second": 0.372, |
|
"step": 753 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 753, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 395071890909696.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|