|
{ |
|
"best_metric": 0.42429444193840027, |
|
"best_model_checkpoint": "t5/checkpoint-3921268", |
|
"epoch": 73.0, |
|
"global_step": 3921268, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00099, |
|
"loss": 0.6596, |
|
"step": 53716 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 7.691952957568906, |
|
"eval_loss": 0.5863233804702759, |
|
"eval_runtime": 6528.3322, |
|
"eval_samples_per_second": 16.456, |
|
"eval_steps_per_second": 1.029, |
|
"step": 53716 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00098, |
|
"loss": 0.5807, |
|
"step": 107432 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 7.596172987179689, |
|
"eval_loss": 0.5534030199050903, |
|
"eval_runtime": 6601.0537, |
|
"eval_samples_per_second": 16.275, |
|
"eval_steps_per_second": 1.017, |
|
"step": 107432 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0009699999999999999, |
|
"loss": 0.5569, |
|
"step": 161148 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 7.149818476141028, |
|
"eval_loss": 0.538519024848938, |
|
"eval_runtime": 6606.0069, |
|
"eval_samples_per_second": 16.263, |
|
"eval_steps_per_second": 1.016, |
|
"step": 161148 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00096, |
|
"loss": 0.5441, |
|
"step": 214864 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 7.493924944402864, |
|
"eval_loss": 0.5301510691642761, |
|
"eval_runtime": 6228.7775, |
|
"eval_samples_per_second": 17.248, |
|
"eval_steps_per_second": 1.078, |
|
"step": 214864 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00095, |
|
"loss": 0.5349, |
|
"step": 268580 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 7.031863868448649, |
|
"eval_loss": 0.5224108695983887, |
|
"eval_runtime": 6611.0145, |
|
"eval_samples_per_second": 16.25, |
|
"eval_steps_per_second": 1.016, |
|
"step": 268580 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00094, |
|
"loss": 0.5281, |
|
"step": 322296 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 7.644179348361122, |
|
"eval_loss": 0.5192911028862, |
|
"eval_runtime": 6621.1034, |
|
"eval_samples_per_second": 16.226, |
|
"eval_steps_per_second": 1.014, |
|
"step": 322296 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00093, |
|
"loss": 0.5222, |
|
"step": 376012 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 7.607672700840728, |
|
"eval_loss": 0.5128632187843323, |
|
"eval_runtime": 6621.733, |
|
"eval_samples_per_second": 16.224, |
|
"eval_steps_per_second": 1.014, |
|
"step": 376012 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00092, |
|
"loss": 0.5181, |
|
"step": 429728 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 6.071314840861525, |
|
"eval_loss": 0.5084598064422607, |
|
"eval_runtime": 6242.5238, |
|
"eval_samples_per_second": 17.21, |
|
"eval_steps_per_second": 1.076, |
|
"step": 429728 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.00091, |
|
"loss": 0.5137, |
|
"step": 483444 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 7.175821994303286, |
|
"eval_loss": 0.5051391124725342, |
|
"eval_runtime": 6238.6565, |
|
"eval_samples_per_second": 17.22, |
|
"eval_steps_per_second": 1.076, |
|
"step": 483444 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 0.5093, |
|
"step": 537160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 7.716511125290912, |
|
"eval_loss": 0.5000638961791992, |
|
"eval_runtime": 6244.5271, |
|
"eval_samples_per_second": 17.204, |
|
"eval_steps_per_second": 1.075, |
|
"step": 537160 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0008900000000000001, |
|
"loss": 0.5037, |
|
"step": 590876 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 7.072832342346184, |
|
"eval_loss": 0.4958619177341461, |
|
"eval_runtime": 6248.1767, |
|
"eval_samples_per_second": 17.194, |
|
"eval_steps_per_second": 1.075, |
|
"step": 590876 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00088, |
|
"loss": 0.4992, |
|
"step": 644592 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 7.23951068440794, |
|
"eval_loss": 0.4918939471244812, |
|
"eval_runtime": 6240.6707, |
|
"eval_samples_per_second": 17.215, |
|
"eval_steps_per_second": 1.076, |
|
"step": 644592 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.00087, |
|
"loss": 0.4954, |
|
"step": 698308 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 7.381643836163121, |
|
"eval_loss": 0.4886699914932251, |
|
"eval_runtime": 6245.5311, |
|
"eval_samples_per_second": 17.201, |
|
"eval_steps_per_second": 1.075, |
|
"step": 698308 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.00086, |
|
"loss": 0.4915, |
|
"step": 752024 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 5.9857507598052075, |
|
"eval_loss": 0.4870322346687317, |
|
"eval_runtime": 6239.2065, |
|
"eval_samples_per_second": 17.219, |
|
"eval_steps_per_second": 1.076, |
|
"step": 752024 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.00085, |
|
"loss": 0.488, |
|
"step": 805740 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 7.5727246325090976, |
|
"eval_loss": 0.4828002154827118, |
|
"eval_runtime": 6235.1501, |
|
"eval_samples_per_second": 17.23, |
|
"eval_steps_per_second": 1.077, |
|
"step": 805740 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.00084, |
|
"loss": 0.4862, |
|
"step": 859456 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 7.59567809120864, |
|
"eval_loss": 0.4813084900379181, |
|
"eval_runtime": 6235.7161, |
|
"eval_samples_per_second": 17.228, |
|
"eval_steps_per_second": 1.077, |
|
"step": 859456 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.00083, |
|
"loss": 0.4827, |
|
"step": 913172 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 7.1431546130798385, |
|
"eval_loss": 0.4796863794326782, |
|
"eval_runtime": 6236.1656, |
|
"eval_samples_per_second": 17.227, |
|
"eval_steps_per_second": 1.077, |
|
"step": 913172 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.00082, |
|
"loss": 0.4798, |
|
"step": 966888 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 7.563058401551067, |
|
"eval_loss": 0.476810485124588, |
|
"eval_runtime": 6252.4371, |
|
"eval_samples_per_second": 17.182, |
|
"eval_steps_per_second": 1.074, |
|
"step": 966888 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.0008100000000000001, |
|
"loss": 0.4767, |
|
"step": 1020604 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 7.242193570088235, |
|
"eval_loss": 0.47421401739120483, |
|
"eval_runtime": 6234.3401, |
|
"eval_samples_per_second": 17.232, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1020604 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0008, |
|
"loss": 0.4748, |
|
"step": 1074320 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 6.120830355327935, |
|
"eval_loss": 0.47452765703201294, |
|
"eval_runtime": 6255.864, |
|
"eval_samples_per_second": 17.173, |
|
"eval_steps_per_second": 1.073, |
|
"step": 1074320 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 0.00079, |
|
"loss": 0.4735, |
|
"step": 1128036 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 7.641822854665483, |
|
"eval_loss": 0.4723513424396515, |
|
"eval_runtime": 6304.5815, |
|
"eval_samples_per_second": 17.04, |
|
"eval_steps_per_second": 1.065, |
|
"step": 1128036 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.0007800000000000001, |
|
"loss": 0.4716, |
|
"step": 1181752 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 7.3059537693760594, |
|
"eval_loss": 0.4718638062477112, |
|
"eval_runtime": 6317.7024, |
|
"eval_samples_per_second": 17.005, |
|
"eval_steps_per_second": 1.063, |
|
"step": 1181752 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 0.0007700000000000001, |
|
"loss": 0.469, |
|
"step": 1235468 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 7.598346638071266, |
|
"eval_loss": 0.46901023387908936, |
|
"eval_runtime": 6308.4041, |
|
"eval_samples_per_second": 17.03, |
|
"eval_steps_per_second": 1.064, |
|
"step": 1235468 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.00076, |
|
"loss": 0.4669, |
|
"step": 1289184 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 6.799435285671091, |
|
"eval_loss": 0.46680623292922974, |
|
"eval_runtime": 6315.2629, |
|
"eval_samples_per_second": 17.011, |
|
"eval_steps_per_second": 1.063, |
|
"step": 1289184 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.4641, |
|
"step": 1342900 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 7.62775725124654, |
|
"eval_loss": 0.4666709899902344, |
|
"eval_runtime": 6262.5243, |
|
"eval_samples_per_second": 17.155, |
|
"eval_steps_per_second": 1.072, |
|
"step": 1342900 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.00074, |
|
"loss": 0.4618, |
|
"step": 1396616 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 6.969418527447973, |
|
"eval_loss": 0.4641306698322296, |
|
"eval_runtime": 6239.5276, |
|
"eval_samples_per_second": 17.218, |
|
"eval_steps_per_second": 1.076, |
|
"step": 1396616 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 0.00073, |
|
"loss": 0.4606, |
|
"step": 1450332 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 7.458516781341554, |
|
"eval_loss": 0.4627404510974884, |
|
"eval_runtime": 6245.0374, |
|
"eval_samples_per_second": 17.203, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1450332 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.0007199999999999999, |
|
"loss": 0.4582, |
|
"step": 1504048 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 6.694355343847021, |
|
"eval_loss": 0.46318283677101135, |
|
"eval_runtime": 6248.0853, |
|
"eval_samples_per_second": 17.194, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1504048 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 0.00071, |
|
"loss": 0.4569, |
|
"step": 1557764 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 7.538791367466209, |
|
"eval_loss": 0.46087339520454407, |
|
"eval_runtime": 6241.0141, |
|
"eval_samples_per_second": 17.214, |
|
"eval_steps_per_second": 1.076, |
|
"step": 1557764 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.0007, |
|
"loss": 0.4548, |
|
"step": 1611480 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bleu": 7.528860869957395, |
|
"eval_loss": 0.4588477909564972, |
|
"eval_runtime": 6242.3068, |
|
"eval_samples_per_second": 17.21, |
|
"eval_steps_per_second": 1.076, |
|
"step": 1611480 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 0.00069, |
|
"loss": 0.4537, |
|
"step": 1665196 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bleu": 7.362216478280285, |
|
"eval_loss": 0.4597391188144684, |
|
"eval_runtime": 6244.4866, |
|
"eval_samples_per_second": 17.204, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1665196 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.00068, |
|
"loss": 0.4513, |
|
"step": 1718912 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bleu": 7.137390175844847, |
|
"eval_loss": 0.4572164714336395, |
|
"eval_runtime": 6244.2148, |
|
"eval_samples_per_second": 17.205, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1718912 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 0.00067, |
|
"loss": 0.4485, |
|
"step": 1772628 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bleu": 7.081305145228205, |
|
"eval_loss": 0.45658349990844727, |
|
"eval_runtime": 6241.0726, |
|
"eval_samples_per_second": 17.214, |
|
"eval_steps_per_second": 1.076, |
|
"step": 1772628 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 0.00066, |
|
"loss": 0.4469, |
|
"step": 1826344 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bleu": 7.065210289724078, |
|
"eval_loss": 0.4544486701488495, |
|
"eval_runtime": 6253.2099, |
|
"eval_samples_per_second": 17.18, |
|
"eval_steps_per_second": 1.074, |
|
"step": 1826344 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 0.0006500000000000001, |
|
"loss": 0.4449, |
|
"step": 1880060 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bleu": 7.378548531953654, |
|
"eval_loss": 0.4559008777141571, |
|
"eval_runtime": 6234.4769, |
|
"eval_samples_per_second": 17.232, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1880060 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.00064, |
|
"loss": 0.4442, |
|
"step": 1933776 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bleu": 7.356901577029033, |
|
"eval_loss": 0.4534740746021271, |
|
"eval_runtime": 6249.755, |
|
"eval_samples_per_second": 17.19, |
|
"eval_steps_per_second": 1.074, |
|
"step": 1933776 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 0.00063, |
|
"loss": 0.4431, |
|
"step": 1987492 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bleu": 7.175291475992041, |
|
"eval_loss": 0.45327481627464294, |
|
"eval_runtime": 6249.6092, |
|
"eval_samples_per_second": 17.19, |
|
"eval_steps_per_second": 1.074, |
|
"step": 1987492 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 0.00062, |
|
"loss": 0.441, |
|
"step": 2041208 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bleu": 7.359022144163392, |
|
"eval_loss": 0.4524107277393341, |
|
"eval_runtime": 6240.8973, |
|
"eval_samples_per_second": 17.214, |
|
"eval_steps_per_second": 1.076, |
|
"step": 2041208 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 0.00061, |
|
"loss": 0.4387, |
|
"step": 2094924 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bleu": 7.5283460331563745, |
|
"eval_loss": 0.4496091306209564, |
|
"eval_runtime": 6237.5918, |
|
"eval_samples_per_second": 17.223, |
|
"eval_steps_per_second": 1.077, |
|
"step": 2094924 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0006, |
|
"loss": 0.4359, |
|
"step": 2148640 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bleu": 7.5346208014087495, |
|
"eval_loss": 0.44786250591278076, |
|
"eval_runtime": 6243.0368, |
|
"eval_samples_per_second": 17.208, |
|
"eval_steps_per_second": 1.076, |
|
"step": 2148640 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 0.00059, |
|
"loss": 0.4338, |
|
"step": 2202356 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bleu": 7.406528761971476, |
|
"eval_loss": 0.44740021228790283, |
|
"eval_runtime": 6247.6804, |
|
"eval_samples_per_second": 17.195, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2202356 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 0.00058, |
|
"loss": 0.4319, |
|
"step": 2256072 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bleu": 7.192159097527976, |
|
"eval_loss": 0.447433739900589, |
|
"eval_runtime": 6248.8648, |
|
"eval_samples_per_second": 17.192, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2256072 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 0.00057, |
|
"loss": 0.43, |
|
"step": 2309788 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bleu": 7.325069602605064, |
|
"eval_loss": 0.4456492066383362, |
|
"eval_runtime": 6263.0673, |
|
"eval_samples_per_second": 17.153, |
|
"eval_steps_per_second": 1.072, |
|
"step": 2309788 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.0005600000000000001, |
|
"loss": 0.4279, |
|
"step": 2363504 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bleu": 7.532048814014251, |
|
"eval_loss": 0.4445250332355499, |
|
"eval_runtime": 6241.8298, |
|
"eval_samples_per_second": 17.211, |
|
"eval_steps_per_second": 1.076, |
|
"step": 2363504 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 0.00055, |
|
"loss": 0.426, |
|
"step": 2417220 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bleu": 7.174420155924515, |
|
"eval_loss": 0.44330111145973206, |
|
"eval_runtime": 6231.5445, |
|
"eval_samples_per_second": 17.24, |
|
"eval_steps_per_second": 1.078, |
|
"step": 2417220 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 0.00054, |
|
"loss": 0.4239, |
|
"step": 2470936 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bleu": 7.653281782827262, |
|
"eval_loss": 0.44130608439445496, |
|
"eval_runtime": 6244.5962, |
|
"eval_samples_per_second": 17.204, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2470936 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 0.0005300000000000001, |
|
"loss": 0.422, |
|
"step": 2524652 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_bleu": 7.358951072022719, |
|
"eval_loss": 0.4416486620903015, |
|
"eval_runtime": 6246.4215, |
|
"eval_samples_per_second": 17.199, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2524652 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.0005200000000000001, |
|
"loss": 0.4206, |
|
"step": 2578368 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_bleu": 6.700995294592222, |
|
"eval_loss": 0.441184937953949, |
|
"eval_runtime": 6248.9024, |
|
"eval_samples_per_second": 17.192, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2578368 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 0.00051, |
|
"loss": 0.4186, |
|
"step": 2632084 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_bleu": 7.428626778422992, |
|
"eval_loss": 0.44076189398765564, |
|
"eval_runtime": 6244.5819, |
|
"eval_samples_per_second": 17.204, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2632084 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.416, |
|
"step": 2685800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_bleu": 7.538386131386865, |
|
"eval_loss": 0.43902388215065, |
|
"eval_runtime": 6236.1942, |
|
"eval_samples_per_second": 17.227, |
|
"eval_steps_per_second": 1.077, |
|
"step": 2685800 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 0.00049, |
|
"loss": 0.4145, |
|
"step": 2739516 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_bleu": 7.177849858240658, |
|
"eval_loss": 0.4388711452484131, |
|
"eval_runtime": 6244.4397, |
|
"eval_samples_per_second": 17.204, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2739516 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 0.00048, |
|
"loss": 0.4115, |
|
"step": 2793232 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_bleu": 7.3825806146338895, |
|
"eval_loss": 0.43703773617744446, |
|
"eval_runtime": 6246.8394, |
|
"eval_samples_per_second": 17.198, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2793232 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 0.00047, |
|
"loss": 0.4091, |
|
"step": 2846948 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_bleu": 7.354983260965792, |
|
"eval_loss": 0.4351899325847626, |
|
"eval_runtime": 6249.7526, |
|
"eval_samples_per_second": 17.19, |
|
"eval_steps_per_second": 1.074, |
|
"step": 2846948 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 0.00046, |
|
"loss": 0.4062, |
|
"step": 2900664 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_bleu": 7.070865789057057, |
|
"eval_loss": 0.4349888265132904, |
|
"eval_runtime": 6255.1992, |
|
"eval_samples_per_second": 17.175, |
|
"eval_steps_per_second": 1.074, |
|
"step": 2900664 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 0.4038, |
|
"step": 2954380 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_bleu": 7.724805289860729, |
|
"eval_loss": 0.4359044134616852, |
|
"eval_runtime": 6250.2651, |
|
"eval_samples_per_second": 17.188, |
|
"eval_steps_per_second": 1.074, |
|
"step": 2954380 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.00044, |
|
"loss": 0.402, |
|
"step": 3008096 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_bleu": 7.154898411407466, |
|
"eval_loss": 0.4326974153518677, |
|
"eval_runtime": 6264.3019, |
|
"eval_samples_per_second": 17.15, |
|
"eval_steps_per_second": 1.072, |
|
"step": 3008096 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 0.00043, |
|
"loss": 0.3995, |
|
"step": 3061812 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_bleu": 7.508317247767554, |
|
"eval_loss": 0.4333774149417877, |
|
"eval_runtime": 6245.5823, |
|
"eval_samples_per_second": 17.201, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3061812 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 0.00042, |
|
"loss": 0.3972, |
|
"step": 3115528 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_bleu": 7.127695021274113, |
|
"eval_loss": 0.43104425072669983, |
|
"eval_runtime": 6238.945, |
|
"eval_samples_per_second": 17.219, |
|
"eval_steps_per_second": 1.076, |
|
"step": 3115528 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 0.00041, |
|
"loss": 0.3942, |
|
"step": 3169244 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_bleu": 6.749919689906369, |
|
"eval_loss": 0.4318625032901764, |
|
"eval_runtime": 6250.8033, |
|
"eval_samples_per_second": 17.187, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3169244 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0004, |
|
"loss": 0.3921, |
|
"step": 3222960 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_bleu": 7.099626120333918, |
|
"eval_loss": 0.4313414394855499, |
|
"eval_runtime": 6233.5496, |
|
"eval_samples_per_second": 17.234, |
|
"eval_steps_per_second": 1.077, |
|
"step": 3222960 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 0.00039000000000000005, |
|
"loss": 0.3897, |
|
"step": 3276676 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_bleu": 7.280842993868327, |
|
"eval_loss": 0.4297857880592346, |
|
"eval_runtime": 6255.1519, |
|
"eval_samples_per_second": 17.175, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3276676 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 0.00038, |
|
"loss": 0.3867, |
|
"step": 3330392 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_bleu": 7.328384730172046, |
|
"eval_loss": 0.42802175879478455, |
|
"eval_runtime": 6245.3286, |
|
"eval_samples_per_second": 17.202, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3330392 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 0.00037, |
|
"loss": 0.3832, |
|
"step": 3384108 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_bleu": 7.230903636346123, |
|
"eval_loss": 0.42855262756347656, |
|
"eval_runtime": 6240.2263, |
|
"eval_samples_per_second": 17.216, |
|
"eval_steps_per_second": 1.076, |
|
"step": 3384108 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.00035999999999999997, |
|
"loss": 0.3807, |
|
"step": 3437824 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_bleu": 7.557291062260919, |
|
"eval_loss": 0.4279802143573761, |
|
"eval_runtime": 6249.6778, |
|
"eval_samples_per_second": 17.19, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3437824 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 0.00035, |
|
"loss": 0.3779, |
|
"step": 3491540 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_bleu": 7.456221414498501, |
|
"eval_loss": 0.42722874879837036, |
|
"eval_runtime": 6251.0806, |
|
"eval_samples_per_second": 17.186, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3491540 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 0.00034, |
|
"loss": 0.3746, |
|
"step": 3545256 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_bleu": 7.38246671281172, |
|
"eval_loss": 0.4264260232448578, |
|
"eval_runtime": 6254.848, |
|
"eval_samples_per_second": 17.176, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3545256 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 0.00033, |
|
"loss": 0.3713, |
|
"step": 3598972 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_bleu": 7.250094489059249, |
|
"eval_loss": 0.42612648010253906, |
|
"eval_runtime": 6253.4328, |
|
"eval_samples_per_second": 17.18, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3598972 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 0.00032, |
|
"loss": 0.3679, |
|
"step": 3652688 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_bleu": 7.24260795309734, |
|
"eval_loss": 0.42605340480804443, |
|
"eval_runtime": 6249.7217, |
|
"eval_samples_per_second": 17.19, |
|
"eval_steps_per_second": 1.074, |
|
"step": 3652688 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 0.00031, |
|
"loss": 0.3646, |
|
"step": 3706404 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_bleu": 7.705633206021796, |
|
"eval_loss": 0.42531710863113403, |
|
"eval_runtime": 6255.4922, |
|
"eval_samples_per_second": 17.174, |
|
"eval_steps_per_second": 1.073, |
|
"step": 3706404 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3617, |
|
"step": 3760120 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_bleu": 7.066333513511338, |
|
"eval_loss": 0.4245583415031433, |
|
"eval_runtime": 6242.6393, |
|
"eval_samples_per_second": 17.209, |
|
"eval_steps_per_second": 1.076, |
|
"step": 3760120 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 0.00029, |
|
"loss": 0.3576, |
|
"step": 3813836 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_bleu": 7.453191107022425, |
|
"eval_loss": 0.4248814582824707, |
|
"eval_runtime": 6236.6745, |
|
"eval_samples_per_second": 17.226, |
|
"eval_steps_per_second": 1.077, |
|
"step": 3813836 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 0.00028000000000000003, |
|
"loss": 0.3538, |
|
"step": 3867552 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_bleu": 7.399721264841341, |
|
"eval_loss": 0.42560645937919617, |
|
"eval_runtime": 6247.7185, |
|
"eval_samples_per_second": 17.195, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3867552 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 0.00027, |
|
"loss": 0.3498, |
|
"step": 3921268 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_bleu": 7.23131596568943, |
|
"eval_loss": 0.42429444193840027, |
|
"eval_runtime": 6248.7182, |
|
"eval_samples_per_second": 17.192, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3921268 |
|
} |
|
], |
|
"max_steps": 5371600, |
|
"num_train_epochs": 100, |
|
"total_flos": 8.225066520360465e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|