|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 8260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.25e-05, |
|
"loss": 5.7733, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4626414056321562, |
|
"eval_loss": 3.1065430641174316, |
|
"eval_runtime": 4.0915, |
|
"eval_samples_per_second": 1231.319, |
|
"eval_steps_per_second": 2.444, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.000125, |
|
"loss": 3.1078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001875, |
|
"loss": 2.7977, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.49604892580170235, |
|
"eval_loss": 2.750513792037964, |
|
"eval_runtime": 4.2814, |
|
"eval_samples_per_second": 1176.724, |
|
"eval_steps_per_second": 2.336, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00025, |
|
"loss": 2.6214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5098120426490952, |
|
"eval_loss": 2.5736496448516846, |
|
"eval_runtime": 4.397, |
|
"eval_samples_per_second": 1145.772, |
|
"eval_steps_per_second": 2.274, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0003125, |
|
"loss": 2.513, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.000375, |
|
"loss": 2.4167, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5208181599372329, |
|
"eval_loss": 2.4625895023345947, |
|
"eval_runtime": 4.4219, |
|
"eval_samples_per_second": 1139.337, |
|
"eval_steps_per_second": 2.261, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00043750000000000006, |
|
"loss": 2.3476, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0005, |
|
"loss": 2.291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5284624257219932, |
|
"eval_loss": 2.394136667251587, |
|
"eval_runtime": 4.4258, |
|
"eval_samples_per_second": 1138.337, |
|
"eval_steps_per_second": 2.26, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0005625000000000001, |
|
"loss": 2.2345, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5323009693260355, |
|
"eval_loss": 2.3510043621063232, |
|
"eval_runtime": 4.5875, |
|
"eval_samples_per_second": 1098.192, |
|
"eval_steps_per_second": 2.18, |
|
"step": 4956 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.000625, |
|
"loss": 2.2073, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0006875, |
|
"loss": 2.1641, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5357347153757428, |
|
"eval_loss": 2.32427716255188, |
|
"eval_runtime": 4.4956, |
|
"eval_samples_per_second": 1120.641, |
|
"eval_steps_per_second": 2.224, |
|
"step": 5782 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00075, |
|
"loss": 2.14, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.0008125, |
|
"loss": 2.1255, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5391278253775245, |
|
"eval_loss": 2.3039562702178955, |
|
"eval_runtime": 4.4937, |
|
"eval_samples_per_second": 1121.12, |
|
"eval_steps_per_second": 2.225, |
|
"step": 6608 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0008750000000000001, |
|
"loss": 2.0883, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5396185838024713, |
|
"eval_loss": 2.294677257537842, |
|
"eval_runtime": 4.5308, |
|
"eval_samples_per_second": 1111.95, |
|
"eval_steps_per_second": 2.207, |
|
"step": 7434 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0009375, |
|
"loss": 2.096, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.001, |
|
"loss": 2.0612, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5411096141763542, |
|
"eval_loss": 2.2898333072662354, |
|
"eval_runtime": 4.54, |
|
"eval_samples_per_second": 1109.693, |
|
"eval_steps_per_second": 2.203, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 8260, |
|
"total_flos": 2562796651806720.0, |
|
"train_loss": 2.546341962098498, |
|
"train_runtime": 742.1102, |
|
"train_samples_per_second": 711.646, |
|
"train_steps_per_second": 11.13 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8260, |
|
"num_train_epochs": 10, |
|
"save_steps": 2000, |
|
"total_flos": 2562796651806720.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|