|
{ |
|
"best_metric": 1.410552978515625, |
|
"best_model_checkpoint": "./mbartLarge_mid_en-ko1/checkpoint-7500", |
|
"epoch": 10.1010101010101, |
|
"eval_steps": 1500, |
|
"global_step": 13500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1372, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.998899601395987e-05, |
|
"loss": 1.7299, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.995599374285618e-05, |
|
"loss": 1.5855, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_bleu": 11.5186, |
|
"eval_gen_len": 16.204, |
|
"eval_loss": 1.5215433835983276, |
|
"eval_runtime": 165.3535, |
|
"eval_samples_per_second": 16.159, |
|
"eval_steps_per_second": 1.01, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.990102223921136e-05, |
|
"loss": 1.499, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.9824129895478125e-05, |
|
"loss": 1.4854, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.9725384401438634e-05, |
|
"loss": 1.4287, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_bleu": 12.2855, |
|
"eval_gen_len": 16.1497, |
|
"eval_loss": 1.4549453258514404, |
|
"eval_runtime": 165.6095, |
|
"eval_samples_per_second": 16.134, |
|
"eval_steps_per_second": 1.008, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.960487268461591e-05, |
|
"loss": 1.38, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.946270083374993e-05, |
|
"loss": 1.385, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.929899400540567e-05, |
|
"loss": 1.2937, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_bleu": 12.6484, |
|
"eval_gen_len": 16.2152, |
|
"eval_loss": 1.4249523878097534, |
|
"eval_runtime": 169.9417, |
|
"eval_samples_per_second": 15.723, |
|
"eval_steps_per_second": 0.983, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 4.9113896313795424e-05, |
|
"loss": 1.3033, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.8907570703912376e-05, |
|
"loss": 1.2837, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.868019880808697e-05, |
|
"loss": 1.2444, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_bleu": 13.0063, |
|
"eval_gen_len": 16.0749, |
|
"eval_loss": 1.4164687395095825, |
|
"eval_runtime": 167.1035, |
|
"eval_samples_per_second": 15.99, |
|
"eval_steps_per_second": 0.999, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 4.843198078609264e-05, |
|
"loss": 1.2463, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 4.816313514894128e-05, |
|
"loss": 1.1724, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 4.787389856652394e-05, |
|
"loss": 1.1335, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_bleu": 13.2758, |
|
"eval_gen_len": 16.235, |
|
"eval_loss": 1.410552978515625, |
|
"eval_runtime": 168.3674, |
|
"eval_samples_per_second": 15.87, |
|
"eval_steps_per_second": 0.992, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.7564525659265865e-05, |
|
"loss": 1.1514, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 4.7235288773979234e-05, |
|
"loss": 1.0421, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 4.688647774411124e-05, |
|
"loss": 1.0508, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"eval_bleu": 13.0601, |
|
"eval_gen_len": 15.86, |
|
"eval_loss": 1.4243229627609253, |
|
"eval_runtime": 163.5075, |
|
"eval_samples_per_second": 16.342, |
|
"eval_steps_per_second": 1.021, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 4.651839963459813e-05, |
|
"loss": 1.0106, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 4.6131378471550225e-05, |
|
"loss": 0.9055, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 4.572575495700553e-05, |
|
"loss": 0.9462, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_bleu": 13.0828, |
|
"eval_gen_len": 16.0475, |
|
"eval_loss": 1.4497138261795044, |
|
"eval_runtime": 157.7664, |
|
"eval_samples_per_second": 16.936, |
|
"eval_steps_per_second": 1.059, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 4.53018861690034e-05, |
|
"loss": 0.8717, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.48601452472419e-05, |
|
"loss": 0.8452, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 4.4400921064595953e-05, |
|
"loss": 0.8464, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_bleu": 13.5878, |
|
"eval_gen_len": 15.9308, |
|
"eval_loss": 1.4692068099975586, |
|
"eval_runtime": 157.7767, |
|
"eval_samples_per_second": 16.935, |
|
"eval_steps_per_second": 1.058, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 4.392461788478516e-05, |
|
"loss": 0.69, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 4.3431655006492856e-05, |
|
"loss": 0.6899, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 4.292246639424951e-05, |
|
"loss": 0.6995, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"eval_bleu": 13.1085, |
|
"eval_gen_len": 15.9906, |
|
"eval_loss": 1.5572242736816406, |
|
"eval_runtime": 157.5254, |
|
"eval_samples_per_second": 16.962, |
|
"eval_steps_per_second": 1.06, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"step": 13500, |
|
"total_flos": 4.683080748687688e+17, |
|
"train_loss": 1.1873034170645254, |
|
"train_runtime": 11819.1544, |
|
"train_samples_per_second": 72.361, |
|
"train_steps_per_second": 4.521 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 53440, |
|
"num_train_epochs": 40, |
|
"save_steps": 1500, |
|
"total_flos": 4.683080748687688e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|