|
{ |
|
"best_metric": 0.6774867177009583, |
|
"best_model_checkpoint": "autotrain-xt6nb-pf6ri/checkpoint-1000", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 4.167473793029785, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.0876, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 5.14194917678833, |
|
"learning_rate": 1.5666666666666667e-05, |
|
"loss": 1.1048, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 8.47073745727539, |
|
"learning_rate": 2.3666666666666668e-05, |
|
"loss": 1.085, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 6.285531997680664, |
|
"learning_rate": 3.1e-05, |
|
"loss": 1.0327, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.787246704101562, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 1.0035, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 11.439258575439453, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.0177, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 6.169108867645264, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.8407, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 7.220673084259033, |
|
"learning_rate": 4.8592592592592596e-05, |
|
"loss": 1.0002, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 8.805166244506836, |
|
"learning_rate": 4.770370370370371e-05, |
|
"loss": 0.8377, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 15.92953872680664, |
|
"learning_rate": 4.681481481481482e-05, |
|
"loss": 0.975, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 12.280447006225586, |
|
"learning_rate": 4.592592592592593e-05, |
|
"loss": 0.8324, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 7.390661716461182, |
|
"learning_rate": 4.503703703703704e-05, |
|
"loss": 0.8676, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 3.326416492462158, |
|
"learning_rate": 4.414814814814815e-05, |
|
"loss": 1.0311, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 1.4128365516662598, |
|
"learning_rate": 4.325925925925926e-05, |
|
"loss": 0.9408, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.4254629611968994, |
|
"learning_rate": 4.237037037037037e-05, |
|
"loss": 0.9534, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 5.265137195587158, |
|
"learning_rate": 4.148148148148148e-05, |
|
"loss": 0.8517, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 139.75430297851562, |
|
"learning_rate": 4.059259259259259e-05, |
|
"loss": 0.9692, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 172.7858123779297, |
|
"learning_rate": 3.97037037037037e-05, |
|
"loss": 0.9143, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 42.714324951171875, |
|
"learning_rate": 3.885185185185186e-05, |
|
"loss": 0.8104, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.165309906005859, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.9425, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6906906906906907, |
|
"eval_f1_macro": 0.6632603879089224, |
|
"eval_f1_micro": 0.6906906906906907, |
|
"eval_f1_weighted": 0.6878712561273783, |
|
"eval_loss": 0.8078358769416809, |
|
"eval_precision_macro": 0.6746375251220212, |
|
"eval_precision_micro": 0.6906906906906907, |
|
"eval_precision_weighted": 0.6963016741978844, |
|
"eval_recall_macro": 0.6637361051449268, |
|
"eval_recall_micro": 0.6906906906906907, |
|
"eval_recall_weighted": 0.6906906906906907, |
|
"eval_runtime": 2.1598, |
|
"eval_samples_per_second": 462.551, |
|
"eval_steps_per_second": 29.17, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.008, |
|
"grad_norm": 40.330535888671875, |
|
"learning_rate": 3.7074074074074075e-05, |
|
"loss": 0.8571, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.056, |
|
"grad_norm": 18.710975646972656, |
|
"learning_rate": 3.6185185185185186e-05, |
|
"loss": 0.8107, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.104, |
|
"grad_norm": 7.848007678985596, |
|
"learning_rate": 3.52962962962963e-05, |
|
"loss": 0.8219, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.152, |
|
"grad_norm": 12.77737808227539, |
|
"learning_rate": 3.440740740740741e-05, |
|
"loss": 0.6249, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 4.067225456237793, |
|
"learning_rate": 3.351851851851852e-05, |
|
"loss": 0.7372, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.248, |
|
"grad_norm": 22.771604537963867, |
|
"learning_rate": 3.262962962962963e-05, |
|
"loss": 0.6791, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.296, |
|
"grad_norm": 32.429100036621094, |
|
"learning_rate": 3.174074074074074e-05, |
|
"loss": 0.7434, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.3439999999999999, |
|
"grad_norm": 7.296426773071289, |
|
"learning_rate": 3.0851851851851854e-05, |
|
"loss": 0.7742, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.392, |
|
"grad_norm": 52.18477249145508, |
|
"learning_rate": 2.9962962962962966e-05, |
|
"loss": 0.7379, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 47.735477447509766, |
|
"learning_rate": 2.9074074074074077e-05, |
|
"loss": 0.7652, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.488, |
|
"grad_norm": 214.20318603515625, |
|
"learning_rate": 2.8185185185185185e-05, |
|
"loss": 0.6941, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.536, |
|
"grad_norm": 20.468931198120117, |
|
"learning_rate": 2.7296296296296296e-05, |
|
"loss": 0.7389, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.584, |
|
"grad_norm": 4.2643866539001465, |
|
"learning_rate": 2.6407407407407408e-05, |
|
"loss": 0.7372, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.6320000000000001, |
|
"grad_norm": 18.783554077148438, |
|
"learning_rate": 2.551851851851852e-05, |
|
"loss": 0.711, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 28.915599822998047, |
|
"learning_rate": 2.462962962962963e-05, |
|
"loss": 0.7423, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.728, |
|
"grad_norm": 4.792373180389404, |
|
"learning_rate": 2.3740740740740742e-05, |
|
"loss": 0.6692, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.776, |
|
"grad_norm": 19.33057975769043, |
|
"learning_rate": 2.2851851851851853e-05, |
|
"loss": 0.631, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.8239999999999998, |
|
"grad_norm": 18.55846405029297, |
|
"learning_rate": 2.1962962962962964e-05, |
|
"loss": 0.6367, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.8719999999999999, |
|
"grad_norm": 5.190149784088135, |
|
"learning_rate": 2.1074074074074072e-05, |
|
"loss": 0.6872, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 38.714290618896484, |
|
"learning_rate": 2.0185185185185187e-05, |
|
"loss": 0.6252, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.968, |
|
"grad_norm": 20.541481018066406, |
|
"learning_rate": 1.92962962962963e-05, |
|
"loss": 0.6303, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7177177177177178, |
|
"eval_f1_macro": 0.6839774417313933, |
|
"eval_f1_micro": 0.7177177177177178, |
|
"eval_f1_weighted": 0.7107988255501857, |
|
"eval_loss": 0.6774867177009583, |
|
"eval_precision_macro": 0.7089199316136637, |
|
"eval_precision_micro": 0.7177177177177178, |
|
"eval_precision_weighted": 0.7240729310198633, |
|
"eval_recall_macro": 0.6773815101662369, |
|
"eval_recall_micro": 0.7177177177177178, |
|
"eval_recall_weighted": 0.7177177177177178, |
|
"eval_runtime": 2.1078, |
|
"eval_samples_per_second": 473.965, |
|
"eval_steps_per_second": 29.89, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 24, |
|
"max_steps": 1500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 525305938493952.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|