|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.989690721649485, |
|
"eval_steps": 500, |
|
"global_step": 567, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9896907216494846, |
|
"grad_norm": 2.3543875217437744, |
|
"learning_rate": 9.5e-06, |
|
"loss": 3.0813, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9896907216494846, |
|
"eval_gen_len": 247.969023, |
|
"eval_loss": 2.5624194145202637, |
|
"eval_rouge1": 0.365572, |
|
"eval_rouge2": 0.067814, |
|
"eval_rougeL": 0.174291, |
|
"eval_rougeLsum": 0.338231, |
|
"eval_runtime": 3587.8888, |
|
"eval_samples_per_second": 0.351, |
|
"eval_steps_per_second": 0.088, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.9896907216494846, |
|
"grad_norm": 1.331000566482544, |
|
"learning_rate": 9e-06, |
|
"loss": 2.6117, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.9896907216494846, |
|
"eval_gen_len": 254.757744, |
|
"eval_loss": 2.461825132369995, |
|
"eval_rouge1": 0.378265, |
|
"eval_rouge2": 0.077138, |
|
"eval_rougeL": 0.182503, |
|
"eval_rougeLsum": 0.351427, |
|
"eval_runtime": 3653.2761, |
|
"eval_samples_per_second": 0.345, |
|
"eval_steps_per_second": 0.086, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.9896907216494846, |
|
"grad_norm": 0.8951466083526611, |
|
"learning_rate": 8.5e-06, |
|
"loss": 2.5014, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.9896907216494846, |
|
"eval_gen_len": 254.992851, |
|
"eval_loss": 2.4188287258148193, |
|
"eval_rouge1": 0.374918, |
|
"eval_rouge2": 0.074194, |
|
"eval_rougeL": 0.179174, |
|
"eval_rougeLsum": 0.347582, |
|
"eval_runtime": 3488.9943, |
|
"eval_samples_per_second": 0.361, |
|
"eval_steps_per_second": 0.09, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.9896907216494846, |
|
"grad_norm": 0.8497774004936218, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.4322, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.9896907216494846, |
|
"eval_gen_len": 254.067514, |
|
"eval_loss": 2.3958144187927246, |
|
"eval_rouge1": 0.380259, |
|
"eval_rouge2": 0.078095, |
|
"eval_rougeL": 0.183458, |
|
"eval_rougeLsum": 0.353376, |
|
"eval_runtime": 3495.9619, |
|
"eval_samples_per_second": 0.36, |
|
"eval_steps_per_second": 0.09, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.989690721649485, |
|
"grad_norm": 0.8451509475708008, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.3836, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.989690721649485, |
|
"eval_gen_len": 254.285147, |
|
"eval_loss": 2.3810665607452393, |
|
"eval_rouge1": 0.382543, |
|
"eval_rouge2": 0.079173, |
|
"eval_rougeL": 0.183712, |
|
"eval_rougeLsum": 0.355298, |
|
"eval_runtime": 3613.6748, |
|
"eval_samples_per_second": 0.348, |
|
"eval_steps_per_second": 0.087, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.989690721649485, |
|
"grad_norm": 0.8150149583816528, |
|
"learning_rate": 7e-06, |
|
"loss": 2.3465, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 5.989690721649485, |
|
"eval_gen_len": 254.706116, |
|
"eval_loss": 2.3701136112213135, |
|
"eval_rouge1": 0.386206, |
|
"eval_rouge2": 0.081617, |
|
"eval_rougeL": 0.186495, |
|
"eval_rougeLsum": 0.359838, |
|
"eval_runtime": 3613.0416, |
|
"eval_samples_per_second": 0.348, |
|
"eval_steps_per_second": 0.087, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 6.989690721649485, |
|
"grad_norm": 0.8424471020698547, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 2.3149, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 6.989690721649485, |
|
"eval_gen_len": 254.68467, |
|
"eval_loss": 2.3630220890045166, |
|
"eval_rouge1": 0.388393, |
|
"eval_rouge2": 0.083801, |
|
"eval_rougeL": 0.187039, |
|
"eval_rougeLsum": 0.361264, |
|
"eval_runtime": 3555.8921, |
|
"eval_samples_per_second": 0.354, |
|
"eval_steps_per_second": 0.089, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 7.989690721649485, |
|
"grad_norm": 0.8140623569488525, |
|
"learning_rate": 6e-06, |
|
"loss": 2.2876, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 7.989690721649485, |
|
"eval_gen_len": 255.17077, |
|
"eval_loss": 2.356766939163208, |
|
"eval_rouge1": 0.385187, |
|
"eval_rouge2": 0.081047, |
|
"eval_rougeL": 0.185354, |
|
"eval_rougeLsum": 0.357705, |
|
"eval_runtime": 3499.45, |
|
"eval_samples_per_second": 0.36, |
|
"eval_steps_per_second": 0.09, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 8.989690721649485, |
|
"grad_norm": 0.846682071685791, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 2.2639, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.989690721649485, |
|
"eval_gen_len": 254.744241, |
|
"eval_loss": 2.3528521060943604, |
|
"eval_rouge1": 0.389675, |
|
"eval_rouge2": 0.083846, |
|
"eval_rougeL": 0.187545, |
|
"eval_rougeLsum": 0.361809, |
|
"eval_runtime": 3507.2655, |
|
"eval_samples_per_second": 0.359, |
|
"eval_steps_per_second": 0.09, |
|
"step": 567 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1260, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.5885960285782016e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|