|
{ |
|
"best_metric": 1.2180595397949219, |
|
"best_model_checkpoint": "./enko_mbartLarge_36p_exp1/checkpoint-25000", |
|
"epoch": 4.180990430177459, |
|
"eval_steps": 5000, |
|
"global_step": 45000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3583, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9844667432974e-05, |
|
"loss": 1.7529, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9689334865948e-05, |
|
"loss": 1.5715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9534002298921995e-05, |
|
"loss": 1.535, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.937866973189599e-05, |
|
"loss": 1.5009, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9223337164869984e-05, |
|
"loss": 1.4783, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.906800459784399e-05, |
|
"loss": 1.4542, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8912672030817986e-05, |
|
"loss": 1.441, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.875733946379198e-05, |
|
"loss": 1.425, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.8602006896765975e-05, |
|
"loss": 1.4235, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 12.3168, |
|
"eval_gen_len": 14.6634, |
|
"eval_loss": 1.3893214464187622, |
|
"eval_runtime": 1225.507, |
|
"eval_samples_per_second": 17.629, |
|
"eval_steps_per_second": 1.102, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.844667432973998e-05, |
|
"loss": 1.3969, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.829134176271397e-05, |
|
"loss": 1.3901, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.813600919568797e-05, |
|
"loss": 1.3983, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.798067662866197e-05, |
|
"loss": 1.3744, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.782534406163596e-05, |
|
"loss": 1.3712, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.767001149460996e-05, |
|
"loss": 1.3604, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.751467892758396e-05, |
|
"loss": 1.3653, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.7359346360557956e-05, |
|
"loss": 1.3603, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.7204013793531954e-05, |
|
"loss": 1.3188, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.704868122650595e-05, |
|
"loss": 1.3281, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 14.3522, |
|
"eval_gen_len": 14.9186, |
|
"eval_loss": 1.2917265892028809, |
|
"eval_runtime": 1232.5734, |
|
"eval_samples_per_second": 17.528, |
|
"eval_steps_per_second": 1.096, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.689334865947995e-05, |
|
"loss": 1.3329, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.673801609245395e-05, |
|
"loss": 1.3462, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.6582683525427945e-05, |
|
"loss": 1.3051, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.6427350958401935e-05, |
|
"loss": 1.2672, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.627201839137594e-05, |
|
"loss": 1.2606, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.611668582434994e-05, |
|
"loss": 1.268, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.596135325732393e-05, |
|
"loss": 1.2733, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.5806020690297926e-05, |
|
"loss": 1.2652, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.565068812327193e-05, |
|
"loss": 1.2389, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.549535555624592e-05, |
|
"loss": 1.2506, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 14.3525, |
|
"eval_gen_len": 14.9494, |
|
"eval_loss": 1.2668566703796387, |
|
"eval_runtime": 1245.6055, |
|
"eval_samples_per_second": 17.345, |
|
"eval_steps_per_second": 1.085, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.534002298921992e-05, |
|
"loss": 1.2423, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.5184690422193924e-05, |
|
"loss": 1.2214, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.5029357855167915e-05, |
|
"loss": 1.1984, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.487402528814191e-05, |
|
"loss": 1.1922, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.471869272111591e-05, |
|
"loss": 1.1952, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.456336015408991e-05, |
|
"loss": 1.1998, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.4408027587063905e-05, |
|
"loss": 1.1676, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.42526950200379e-05, |
|
"loss": 1.175, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.40973624530119e-05, |
|
"loss": 1.1674, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.39420298859859e-05, |
|
"loss": 1.1603, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_bleu": 15.248, |
|
"eval_gen_len": 15.0062, |
|
"eval_loss": 1.2283018827438354, |
|
"eval_runtime": 1235.7777, |
|
"eval_samples_per_second": 17.483, |
|
"eval_steps_per_second": 1.093, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.3786697318959896e-05, |
|
"loss": 1.1944, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.3631364751933894e-05, |
|
"loss": 1.1644, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.347603218490789e-05, |
|
"loss": 1.137, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.332069961788189e-05, |
|
"loss": 1.1543, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.316536705085589e-05, |
|
"loss": 1.1359, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.301003448382988e-05, |
|
"loss": 1.1373, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.285470191680388e-05, |
|
"loss": 1.1229, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.269936934977788e-05, |
|
"loss": 1.1076, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.254403678275187e-05, |
|
"loss": 1.0849, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.2388704215725875e-05, |
|
"loss": 1.0765, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_bleu": 15.4063, |
|
"eval_gen_len": 14.7808, |
|
"eval_loss": 1.2180595397949219, |
|
"eval_runtime": 1212.0146, |
|
"eval_samples_per_second": 17.826, |
|
"eval_steps_per_second": 1.115, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.223337164869987e-05, |
|
"loss": 1.107, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.2078039081673864e-05, |
|
"loss": 1.1183, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.192270651464786e-05, |
|
"loss": 1.1004, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.1767373947621866e-05, |
|
"loss": 1.0793, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.161204138059586e-05, |
|
"loss": 1.0901, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.1456708813569854e-05, |
|
"loss": 1.1028, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.130137624654385e-05, |
|
"loss": 1.1398, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.114604367951785e-05, |
|
"loss": 1.088, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.099071111249185e-05, |
|
"loss": 1.0459, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.0835378545465845e-05, |
|
"loss": 1.1019, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_bleu": 14.3608, |
|
"eval_gen_len": 14.9014, |
|
"eval_loss": 1.2753331661224365, |
|
"eval_runtime": 1243.9853, |
|
"eval_samples_per_second": 17.368, |
|
"eval_steps_per_second": 1.086, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.068004597843984e-05, |
|
"loss": 1.0986, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.052471341141384e-05, |
|
"loss": 1.107, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.036938084438784e-05, |
|
"loss": 1.1234, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.021404827736183e-05, |
|
"loss": 1.138, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.0058715710335834e-05, |
|
"loss": 1.1106, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.990338314330983e-05, |
|
"loss": 1.0363, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.974805057628382e-05, |
|
"loss": 1.01, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.959271800925782e-05, |
|
"loss": 1.0107, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.9437385442231824e-05, |
|
"loss": 1.0122, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.9282052875205815e-05, |
|
"loss": 1.0504, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_bleu": 15.3253, |
|
"eval_gen_len": 14.7948, |
|
"eval_loss": 1.2334309816360474, |
|
"eval_runtime": 1248.5109, |
|
"eval_samples_per_second": 17.305, |
|
"eval_steps_per_second": 1.082, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.912672030817981e-05, |
|
"loss": 1.0407, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.897138774115382e-05, |
|
"loss": 1.0186, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.881605517412781e-05, |
|
"loss": 0.9753, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.8660722607101806e-05, |
|
"loss": 0.9714, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.8505390040075804e-05, |
|
"loss": 0.9734, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.83500574730498e-05, |
|
"loss": 0.963, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.81947249060238e-05, |
|
"loss": 1.0455, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.80393923389978e-05, |
|
"loss": 1.064, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.7884059771971794e-05, |
|
"loss": 1.005, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.772872720494579e-05, |
|
"loss": 0.9431, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_bleu": 15.2534, |
|
"eval_gen_len": 14.7293, |
|
"eval_loss": 1.2512198686599731, |
|
"eval_runtime": 1212.804, |
|
"eval_samples_per_second": 17.814, |
|
"eval_steps_per_second": 1.114, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.757339463791979e-05, |
|
"loss": 0.8958, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3.741806207089378e-05, |
|
"loss": 0.865, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.7262729503867785e-05, |
|
"loss": 0.8829, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.710739693684178e-05, |
|
"loss": 0.9097, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.6952064369815774e-05, |
|
"loss": 0.8159, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.679673180278977e-05, |
|
"loss": 0.8056, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.6641399235763776e-05, |
|
"loss": 0.7523, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.648606666873777e-05, |
|
"loss": 0.7307, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.6330734101711764e-05, |
|
"loss": 0.7998, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.617540153468577e-05, |
|
"loss": 0.8394, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_bleu": 14.9999, |
|
"eval_gen_len": 14.7993, |
|
"eval_loss": 1.2971030473709106, |
|
"eval_runtime": 1213.4816, |
|
"eval_samples_per_second": 17.804, |
|
"eval_steps_per_second": 1.113, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"step": 45000, |
|
"total_flos": 1.5605414779085128e+18, |
|
"train_loss": 1.1734986402723524, |
|
"train_runtime": 45995.7579, |
|
"train_samples_per_second": 56.158, |
|
"train_steps_per_second": 3.51 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 161445, |
|
"num_train_epochs": 15, |
|
"save_steps": 5000, |
|
"total_flos": 1.5605414779085128e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|