|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 7320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.25e-05, |
|
"loss": 6.1167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.39470717462922345, |
|
"eval_loss": 3.535817861557007, |
|
"eval_runtime": 3.8077, |
|
"eval_samples_per_second": 1179.442, |
|
"eval_steps_per_second": 2.364, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.000125, |
|
"loss": 3.4801, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.43199785397566787, |
|
"eval_loss": 3.1201677322387695, |
|
"eval_runtime": 3.9073, |
|
"eval_samples_per_second": 1149.375, |
|
"eval_steps_per_second": 2.303, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001875, |
|
"loss": 3.1329, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00025, |
|
"loss": 2.9429, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.44977794609341165, |
|
"eval_loss": 2.915814161300659, |
|
"eval_runtime": 4.1629, |
|
"eval_samples_per_second": 1078.826, |
|
"eval_steps_per_second": 2.162, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.0003125, |
|
"loss": 2.8071, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.460516834193321, |
|
"eval_loss": 2.790581464767456, |
|
"eval_runtime": 4.0203, |
|
"eval_samples_per_second": 1117.085, |
|
"eval_steps_per_second": 2.239, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.000375, |
|
"loss": 2.7064, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00043750000000000006, |
|
"loss": 2.6197, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4701406312186929, |
|
"eval_loss": 2.6998250484466553, |
|
"eval_runtime": 4.0911, |
|
"eval_samples_per_second": 1097.737, |
|
"eval_steps_per_second": 2.2, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0005, |
|
"loss": 2.5459, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4759738199057783, |
|
"eval_loss": 2.641901969909668, |
|
"eval_runtime": 4.078, |
|
"eval_samples_per_second": 1101.267, |
|
"eval_steps_per_second": 2.207, |
|
"step": 4392 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0005625000000000001, |
|
"loss": 2.5015, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.000625, |
|
"loss": 2.4492, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4802465824036524, |
|
"eval_loss": 2.6036274433135986, |
|
"eval_runtime": 4.1331, |
|
"eval_samples_per_second": 1086.595, |
|
"eval_steps_per_second": 2.178, |
|
"step": 5124 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.0006875, |
|
"loss": 2.4065, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4823891001600752, |
|
"eval_loss": 2.5769591331481934, |
|
"eval_runtime": 3.9166, |
|
"eval_samples_per_second": 1146.648, |
|
"eval_steps_per_second": 2.298, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.00075, |
|
"loss": 2.3867, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.0008125, |
|
"loss": 2.3626, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4863024386480748, |
|
"eval_loss": 2.5622453689575195, |
|
"eval_runtime": 3.9696, |
|
"eval_samples_per_second": 1131.334, |
|
"eval_steps_per_second": 2.267, |
|
"step": 6588 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.0008750000000000001, |
|
"loss": 2.3276, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4861165901356519, |
|
"eval_loss": 2.557232141494751, |
|
"eval_runtime": 4.198, |
|
"eval_samples_per_second": 1069.803, |
|
"eval_steps_per_second": 2.144, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7320, |
|
"total_flos": 2273237316403200.0, |
|
"train_loss": 2.8881424867390284, |
|
"train_runtime": 618.3653, |
|
"train_samples_per_second": 757.562, |
|
"train_steps_per_second": 11.838 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7320, |
|
"num_train_epochs": 10, |
|
"save_steps": 2000, |
|
"total_flos": 2273237316403200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|