|
{ |
|
"best_metric": 0.9393565058708191, |
|
"best_model_checkpoint": "./checkpoint-220", |
|
"epoch": 5.930232558139535, |
|
"global_step": 220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 6.7937, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 6.501975059509277, |
|
"eval_runtime": 33.9782, |
|
"eval_samples_per_second": 59.685, |
|
"eval_steps_per_second": 1.884, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001, |
|
"loss": 5.985, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 5.326801776885986, |
|
"eval_runtime": 18.802, |
|
"eval_samples_per_second": 107.861, |
|
"eval_steps_per_second": 3.404, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00015, |
|
"loss": 4.6798, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 3.829317331314087, |
|
"eval_runtime": 18.7742, |
|
"eval_samples_per_second": 108.021, |
|
"eval_steps_per_second": 3.409, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002, |
|
"loss": 3.0462, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.9157757759094238, |
|
"eval_runtime": 18.7907, |
|
"eval_samples_per_second": 107.926, |
|
"eval_steps_per_second": 3.406, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00025, |
|
"loss": 1.5197, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 1.2286747694015503, |
|
"eval_runtime": 18.812, |
|
"eval_samples_per_second": 107.804, |
|
"eval_steps_per_second": 3.402, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0003, |
|
"loss": 1.1349, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 1.1046648025512695, |
|
"eval_runtime": 18.8413, |
|
"eval_samples_per_second": 107.636, |
|
"eval_steps_per_second": 3.397, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00035, |
|
"loss": 1.0718, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 1.0654218196868896, |
|
"eval_runtime": 18.8231, |
|
"eval_samples_per_second": 107.74, |
|
"eval_steps_per_second": 3.4, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0004, |
|
"loss": 1.0987, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.0404733419418335, |
|
"eval_runtime": 18.8082, |
|
"eval_samples_per_second": 107.825, |
|
"eval_steps_per_second": 3.403, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 1.0133, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 1.0208371877670288, |
|
"eval_runtime": 18.8192, |
|
"eval_samples_per_second": 107.762, |
|
"eval_steps_per_second": 3.401, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0005, |
|
"loss": 0.9869, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 1.0047191381454468, |
|
"eval_runtime": 18.852, |
|
"eval_samples_per_second": 107.575, |
|
"eval_steps_per_second": 3.395, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00045901639344262296, |
|
"loss": 0.9809, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.9912181496620178, |
|
"eval_runtime": 18.8516, |
|
"eval_samples_per_second": 107.577, |
|
"eval_steps_per_second": 3.395, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0004180327868852459, |
|
"loss": 1.0275, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 0.9803428649902344, |
|
"eval_runtime": 18.8293, |
|
"eval_samples_per_second": 107.705, |
|
"eval_steps_per_second": 3.399, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0003770491803278688, |
|
"loss": 0.9608, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 0.9717100262641907, |
|
"eval_runtime": 18.8516, |
|
"eval_samples_per_second": 107.577, |
|
"eval_steps_per_second": 3.395, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0003360655737704918, |
|
"loss": 0.9431, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 0.9643934965133667, |
|
"eval_runtime": 18.8147, |
|
"eval_samples_per_second": 107.788, |
|
"eval_steps_per_second": 3.402, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00029508196721311476, |
|
"loss": 0.991, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 0.9581753015518188, |
|
"eval_runtime": 18.8481, |
|
"eval_samples_per_second": 107.597, |
|
"eval_steps_per_second": 3.396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0002540983606557377, |
|
"loss": 0.9387, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_loss": 0.9531411528587341, |
|
"eval_runtime": 18.8022, |
|
"eval_samples_per_second": 107.86, |
|
"eval_steps_per_second": 3.404, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00021311475409836064, |
|
"loss": 0.9203, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 0.9489945769309998, |
|
"eval_runtime": 18.7924, |
|
"eval_samples_per_second": 107.916, |
|
"eval_steps_per_second": 3.406, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00017213114754098362, |
|
"loss": 0.9235, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 0.9456363320350647, |
|
"eval_runtime": 18.8084, |
|
"eval_samples_per_second": 107.824, |
|
"eval_steps_per_second": 3.403, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00013114754098360657, |
|
"loss": 0.9746, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.9429621696472168, |
|
"eval_runtime": 18.8357, |
|
"eval_samples_per_second": 107.668, |
|
"eval_steps_per_second": 3.398, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.016393442622952e-05, |
|
"loss": 0.9176, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 0.9410804510116577, |
|
"eval_runtime": 18.8338, |
|
"eval_samples_per_second": 107.679, |
|
"eval_steps_per_second": 3.398, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 4.9180327868852456e-05, |
|
"loss": 0.9175, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 0.9398788213729858, |
|
"eval_runtime": 18.8076, |
|
"eval_samples_per_second": 107.829, |
|
"eval_steps_per_second": 3.403, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 8.19672131147541e-06, |
|
"loss": 0.91, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 0.9393565058708191, |
|
"eval_runtime": 18.7996, |
|
"eval_samples_per_second": 107.874, |
|
"eval_steps_per_second": 3.404, |
|
"step": 220 |
|
} |
|
], |
|
"max_steps": 222, |
|
"num_train_epochs": 6, |
|
"total_flos": 8.954608582656e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|