|
{ |
|
"best_metric": 0.37704405188560486, |
|
"best_model_checkpoint": "text_go3_aug\\checkpoint-600", |
|
"epoch": 12.834224598930481, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6e-07, |
|
"loss": 0.2199, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.8704156479217604, |
|
"eval_loss": 0.3835061192512512, |
|
"eval_runtime": 13.2805, |
|
"eval_samples_per_second": 400.362, |
|
"eval_steps_per_second": 1.581, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.804305283757338e-07, |
|
"loss": 0.2115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.8726725597141245, |
|
"eval_loss": 0.37704405188560486, |
|
"eval_runtime": 5.9909, |
|
"eval_samples_per_second": 887.511, |
|
"eval_steps_per_second": 3.505, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 9.217221135029354e-07, |
|
"loss": 0.2059, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.8751175474891856, |
|
"eval_loss": 0.37850409746170044, |
|
"eval_runtime": 5.7996, |
|
"eval_samples_per_second": 916.787, |
|
"eval_steps_per_second": 3.621, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 8.63013698630137e-07, |
|
"loss": 0.2025, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_accuracy": 0.8751175474891856, |
|
"eval_loss": 0.3802962303161621, |
|
"eval_runtime": 5.6716, |
|
"eval_samples_per_second": 937.483, |
|
"eval_steps_per_second": 3.703, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 8.043052837573385e-07, |
|
"loss": 0.2035, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.8749294715064886, |
|
"eval_loss": 0.379294753074646, |
|
"eval_runtime": 5.6111, |
|
"eval_samples_per_second": 947.593, |
|
"eval_steps_per_second": 3.743, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 7.455968688845401e-07, |
|
"loss": 0.1956, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_accuracy": 0.8756817754372767, |
|
"eval_loss": 0.37750834226608276, |
|
"eval_runtime": 5.916, |
|
"eval_samples_per_second": 898.751, |
|
"eval_steps_per_second": 3.55, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 6.868884540117417e-07, |
|
"loss": 0.1979, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"eval_accuracy": 0.8769983073161557, |
|
"eval_loss": 0.37952980399131775, |
|
"eval_runtime": 5.8097, |
|
"eval_samples_per_second": 915.188, |
|
"eval_steps_per_second": 3.615, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 6.281800391389432e-07, |
|
"loss": 0.1939, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"eval_accuracy": 0.8768102313334587, |
|
"eval_loss": 0.37845009565353394, |
|
"eval_runtime": 5.7511, |
|
"eval_samples_per_second": 924.522, |
|
"eval_steps_per_second": 3.651, |
|
"step": 2400 |
|
} |
|
], |
|
"max_steps": 5610, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.8243640989395136e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|