|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 8260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.25e-05, |
|
"loss": 5.8796, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4610925470362255, |
|
"eval_loss": 3.108325958251953, |
|
"eval_runtime": 4.3048, |
|
"eval_samples_per_second": 1170.326, |
|
"eval_steps_per_second": 2.323, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.000125, |
|
"loss": 3.1165, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001875, |
|
"loss": 2.802, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.49653186960204804, |
|
"eval_loss": 2.7454917430877686, |
|
"eval_runtime": 4.0976, |
|
"eval_samples_per_second": 1229.489, |
|
"eval_steps_per_second": 2.44, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00025, |
|
"loss": 2.6268, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5115797107338558, |
|
"eval_loss": 2.573380947113037, |
|
"eval_runtime": 4.4435, |
|
"eval_samples_per_second": 1133.781, |
|
"eval_steps_per_second": 2.25, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0003125, |
|
"loss": 2.5157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.000375, |
|
"loss": 2.4165, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5211369966209564, |
|
"eval_loss": 2.4666714668273926, |
|
"eval_runtime": 4.525, |
|
"eval_samples_per_second": 1113.377, |
|
"eval_steps_per_second": 2.21, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00043750000000000006, |
|
"loss": 2.3502, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2892, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5287937658050783, |
|
"eval_loss": 2.394850969314575, |
|
"eval_runtime": 4.6178, |
|
"eval_samples_per_second": 1090.991, |
|
"eval_steps_per_second": 2.166, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0005625000000000001, |
|
"loss": 2.2315, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5337701187510354, |
|
"eval_loss": 2.344557523727417, |
|
"eval_runtime": 4.6005, |
|
"eval_samples_per_second": 1095.088, |
|
"eval_steps_per_second": 2.174, |
|
"step": 4956 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.000625, |
|
"loss": 2.2096, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0006875, |
|
"loss": 2.1587, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5373570314429236, |
|
"eval_loss": 2.3208389282226562, |
|
"eval_runtime": 4.4883, |
|
"eval_samples_per_second": 1122.477, |
|
"eval_steps_per_second": 2.228, |
|
"step": 5782 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00075, |
|
"loss": 2.139, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.0008125, |
|
"loss": 2.1253, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5394279069622053, |
|
"eval_loss": 2.3043758869171143, |
|
"eval_runtime": 4.5793, |
|
"eval_samples_per_second": 1100.17, |
|
"eval_steps_per_second": 2.184, |
|
"step": 6608 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0008750000000000001, |
|
"loss": 2.0858, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5403687877641734, |
|
"eval_loss": 2.2939975261688232, |
|
"eval_runtime": 4.5389, |
|
"eval_samples_per_second": 1109.953, |
|
"eval_steps_per_second": 2.203, |
|
"step": 7434 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0009375, |
|
"loss": 2.0892, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.001, |
|
"loss": 2.0556, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5416644525230297, |
|
"eval_loss": 2.2877631187438965, |
|
"eval_runtime": 4.6002, |
|
"eval_samples_per_second": 1095.162, |
|
"eval_steps_per_second": 2.174, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 8260, |
|
"total_flos": 2562796651806720.0, |
|
"train_loss": 2.5528629182903297, |
|
"train_runtime": 747.7642, |
|
"train_samples_per_second": 706.265, |
|
"train_steps_per_second": 11.046 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8260, |
|
"num_train_epochs": 10, |
|
"save_steps": 2000, |
|
"total_flos": 2562796651806720.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|