|
{ |
|
"best_metric": 48.71262499235238, |
|
"best_model_checkpoint": "/root/turkic_qa/tr_kaz_models/orig_kaz_roberta_base_model/checkpoint-5823", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 6470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 647, |
|
"train_exact_match": 3.5964035964035963, |
|
"train_f1": 11.56795258837141, |
|
"train_runtime": 11.3677, |
|
"train_samples_per_second": 115.503, |
|
"train_steps_per_second": 4.135 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 11.74316120147705, |
|
"learning_rate": 5e-06, |
|
"loss": 4.6868, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 4.4375, |
|
"eval_f1": 11.321307166156533, |
|
"eval_runtime": 35.34, |
|
"eval_samples_per_second": 113.384, |
|
"eval_steps_per_second": 4.075, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1294, |
|
"train_exact_match": 11.688311688311689, |
|
"train_f1": 19.152344840271617, |
|
"train_runtime": 11.2191, |
|
"train_samples_per_second": 112.844, |
|
"train_steps_per_second": 4.1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 15.642627716064453, |
|
"learning_rate": 1e-05, |
|
"loss": 3.5751, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 9.6875, |
|
"eval_f1": 17.179159683522684, |
|
"eval_runtime": 35.4377, |
|
"eval_samples_per_second": 113.072, |
|
"eval_steps_per_second": 4.063, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1941, |
|
"train_exact_match": 27.47252747252747, |
|
"train_f1": 39.09864539474671, |
|
"train_runtime": 11.4784, |
|
"train_samples_per_second": 112.646, |
|
"train_steps_per_second": 4.095 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 15.969060897827148, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 3.0602, |
|
"step": 1941 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 20.625, |
|
"eval_f1": 31.5365409883315, |
|
"eval_runtime": 35.4806, |
|
"eval_samples_per_second": 112.935, |
|
"eval_steps_per_second": 4.059, |
|
"step": 1941 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2588, |
|
"train_exact_match": 37.46253746253746, |
|
"train_f1": 50.75619914926189, |
|
"train_runtime": 11.2214, |
|
"train_samples_per_second": 114.513, |
|
"train_steps_per_second": 4.099 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 23.334590911865234, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.5025, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 28.5, |
|
"eval_f1": 42.64759962559622, |
|
"eval_runtime": 35.4891, |
|
"eval_samples_per_second": 112.908, |
|
"eval_steps_per_second": 4.058, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3235, |
|
"train_exact_match": 47.55244755244755, |
|
"train_f1": 60.596926593269394, |
|
"train_runtime": 11.4376, |
|
"train_samples_per_second": 113.922, |
|
"train_steps_per_second": 4.109 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 26.927326202392578, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.1403, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 30.9375, |
|
"eval_f1": 45.23519557953087, |
|
"eval_runtime": 35.5162, |
|
"eval_samples_per_second": 112.822, |
|
"eval_steps_per_second": 4.054, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3882, |
|
"train_exact_match": 53.04695304695305, |
|
"train_f1": 65.4389894343176, |
|
"train_runtime": 11.2606, |
|
"train_samples_per_second": 114.381, |
|
"train_steps_per_second": 4.085 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 19.14756965637207, |
|
"learning_rate": 5e-06, |
|
"loss": 1.9037, |
|
"step": 3882 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 32.46875, |
|
"eval_f1": 47.29669704036703, |
|
"eval_runtime": 35.4202, |
|
"eval_samples_per_second": 113.127, |
|
"eval_steps_per_second": 4.065, |
|
"step": 3882 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 4529, |
|
"train_exact_match": 57.642357642357645, |
|
"train_f1": 69.53035836992328, |
|
"train_runtime": 11.2232, |
|
"train_samples_per_second": 112.802, |
|
"train_steps_per_second": 4.099 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 24.628639221191406, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.7438, |
|
"step": 4529 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 33.15625, |
|
"eval_f1": 47.60121866597253, |
|
"eval_runtime": 35.4127, |
|
"eval_samples_per_second": 113.152, |
|
"eval_steps_per_second": 4.066, |
|
"step": 4529 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5176, |
|
"train_exact_match": 56.54345654345654, |
|
"train_f1": 68.42451603920425, |
|
"train_runtime": 11.6061, |
|
"train_samples_per_second": 113.044, |
|
"train_steps_per_second": 4.05 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 25.127649307250977, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.6205, |
|
"step": 5176 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 33.625, |
|
"eval_f1": 48.59237042611232, |
|
"eval_runtime": 35.4006, |
|
"eval_samples_per_second": 113.19, |
|
"eval_steps_per_second": 4.068, |
|
"step": 5176 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 5823, |
|
"train_exact_match": 60.53946053946054, |
|
"train_f1": 71.41358726367281, |
|
"train_runtime": 11.2378, |
|
"train_samples_per_second": 113.545, |
|
"train_steps_per_second": 4.093 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 22.670251846313477, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.5369, |
|
"step": 5823 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 33.8125, |
|
"eval_f1": 48.71262499235238, |
|
"eval_runtime": 35.5251, |
|
"eval_samples_per_second": 112.793, |
|
"eval_steps_per_second": 4.053, |
|
"step": 5823 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6470, |
|
"train_exact_match": 63.73626373626374, |
|
"train_f1": 74.25641909537842, |
|
"train_runtime": 11.4055, |
|
"train_samples_per_second": 113.892, |
|
"train_steps_per_second": 4.121 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 26.434606552124023, |
|
"learning_rate": 0.0, |
|
"loss": 1.4813, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 33.78125, |
|
"eval_f1": 48.574128923315, |
|
"eval_runtime": 35.5406, |
|
"eval_samples_per_second": 112.744, |
|
"eval_steps_per_second": 4.052, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6470, |
|
"total_flos": 1.773811812584448e+16, |
|
"train_loss": 2.425102486234544, |
|
"train_runtime": 2362.6071, |
|
"train_samples_per_second": 76.619, |
|
"train_steps_per_second": 2.739 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6470, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.773811812584448e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|