|
{ |
|
"best_metric": 0.7575966715812683, |
|
"best_model_checkpoint": "/home2/s5431786/jb-internship/results/Salesforce-codet5-small-CodeXGLUE-CONCODE-w_special_tokens/checkpoint-12000", |
|
"epoch": 4.16, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.160588611644274e-05, |
|
"loss": 2.7739, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 0.11002405260920435, |
|
"eval_exact_match": 0.136, |
|
"eval_loss": 0.9681498408317566, |
|
"eval_rouge1": 0.5324111683230927, |
|
"eval_rouge2": 0.3516111966646144, |
|
"eval_rougeL": 0.5130793573577244, |
|
"eval_runtime": 23.6738, |
|
"eval_samples_per_second": 84.482, |
|
"eval_steps_per_second": 2.661, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.35956493921945e-05, |
|
"loss": 0.8968, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bleu": 0.11661343796568427, |
|
"eval_exact_match": 0.148, |
|
"eval_loss": 0.8933660984039307, |
|
"eval_rouge1": 0.5490713398258988, |
|
"eval_rouge2": 0.3737481223472951, |
|
"eval_rougeL": 0.5313359382235461, |
|
"eval_runtime": 21.9288, |
|
"eval_samples_per_second": 91.204, |
|
"eval_steps_per_second": 2.873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.558541266794626e-05, |
|
"loss": 0.8139, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 0.11428222214709638, |
|
"eval_exact_match": 0.148, |
|
"eval_loss": 0.8664323687553406, |
|
"eval_rouge1": 0.5501019947483123, |
|
"eval_rouge2": 0.37606920276228095, |
|
"eval_rougeL": 0.5316473251371836, |
|
"eval_runtime": 21.9364, |
|
"eval_samples_per_second": 91.173, |
|
"eval_steps_per_second": 2.872, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.854818607471284e-05, |
|
"loss": 0.7535, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 0.12118583002054595, |
|
"eval_exact_match": 0.152, |
|
"eval_loss": 0.8371005058288574, |
|
"eval_rouge1": 0.5554072128917051, |
|
"eval_rouge2": 0.37994480723664975, |
|
"eval_rougeL": 0.5360569772635451, |
|
"eval_runtime": 21.9828, |
|
"eval_samples_per_second": 90.98, |
|
"eval_steps_per_second": 2.866, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.686394718226834e-05, |
|
"loss": 0.7267, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 0.1221326088002123, |
|
"eval_exact_match": 0.1555, |
|
"eval_loss": 0.8262679576873779, |
|
"eval_rouge1": 0.5599864774843732, |
|
"eval_rouge2": 0.3892847835247521, |
|
"eval_rougeL": 0.5434109433715582, |
|
"eval_runtime": 22.0553, |
|
"eval_samples_per_second": 90.681, |
|
"eval_steps_per_second": 2.856, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.517970828982384e-05, |
|
"loss": 0.7044, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 0.12264834126055617, |
|
"eval_exact_match": 0.161, |
|
"eval_loss": 0.8118048310279846, |
|
"eval_rouge1": 0.5584777889931278, |
|
"eval_rouge2": 0.38793170420432566, |
|
"eval_rougeL": 0.539366409663261, |
|
"eval_runtime": 21.8691, |
|
"eval_samples_per_second": 91.453, |
|
"eval_steps_per_second": 2.881, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.349546939737934e-05, |
|
"loss": 0.6505, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_bleu": 0.12561336503475642, |
|
"eval_exact_match": 0.1575, |
|
"eval_loss": 0.8004248142242432, |
|
"eval_rouge1": 0.5604501817439145, |
|
"eval_rouge2": 0.39030440934575783, |
|
"eval_rougeL": 0.5425107709271633, |
|
"eval_runtime": 21.9286, |
|
"eval_samples_per_second": 91.205, |
|
"eval_steps_per_second": 2.873, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.181123050493482e-05, |
|
"loss": 0.6301, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_bleu": 0.12658818034969338, |
|
"eval_exact_match": 0.1605, |
|
"eval_loss": 0.7999853491783142, |
|
"eval_rouge1": 0.5627685830925565, |
|
"eval_rouge2": 0.391269622424489, |
|
"eval_rougeL": 0.5451139508088971, |
|
"eval_runtime": 22.0195, |
|
"eval_samples_per_second": 90.828, |
|
"eval_steps_per_second": 2.861, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.012699161249032e-05, |
|
"loss": 0.6199, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_bleu": 0.12894396386527762, |
|
"eval_exact_match": 0.163, |
|
"eval_loss": 0.7873425483703613, |
|
"eval_rouge1": 0.5653377621170572, |
|
"eval_rouge2": 0.39748707891409585, |
|
"eval_rougeL": 0.5469501947752343, |
|
"eval_runtime": 21.8948, |
|
"eval_samples_per_second": 91.346, |
|
"eval_steps_per_second": 2.877, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.844275272004582e-05, |
|
"loss": 0.619, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_bleu": 0.13044433567409516, |
|
"eval_exact_match": 0.159, |
|
"eval_loss": 0.7792118191719055, |
|
"eval_rouge1": 0.5697500148728435, |
|
"eval_rouge2": 0.3972375515095064, |
|
"eval_rougeL": 0.5508657148123793, |
|
"eval_runtime": 22.0783, |
|
"eval_samples_per_second": 90.587, |
|
"eval_steps_per_second": 2.853, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.67585138276013e-05, |
|
"loss": 0.604, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_bleu": 0.12974945121652084, |
|
"eval_exact_match": 0.164, |
|
"eval_loss": 0.7772428393363953, |
|
"eval_rouge1": 0.5689486656450822, |
|
"eval_rouge2": 0.3977032244845097, |
|
"eval_rougeL": 0.5513327295716377, |
|
"eval_runtime": 21.7891, |
|
"eval_samples_per_second": 91.789, |
|
"eval_steps_per_second": 2.891, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.50742749351568e-05, |
|
"loss": 0.5911, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_bleu": 0.13125298952345468, |
|
"eval_exact_match": 0.162, |
|
"eval_loss": 0.7715250253677368, |
|
"eval_rouge1": 0.5661217549934918, |
|
"eval_rouge2": 0.3993895966921206, |
|
"eval_rougeL": 0.5492973852073642, |
|
"eval_runtime": 21.9641, |
|
"eval_samples_per_second": 91.058, |
|
"eval_steps_per_second": 2.868, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 8.33900360427123e-05, |
|
"loss": 0.5661, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_bleu": 0.13309212763109746, |
|
"eval_exact_match": 0.157, |
|
"eval_loss": 0.7687653303146362, |
|
"eval_rouge1": 0.5699981387398212, |
|
"eval_rouge2": 0.3973095013395418, |
|
"eval_rougeL": 0.551455131759035, |
|
"eval_runtime": 21.9964, |
|
"eval_samples_per_second": 90.924, |
|
"eval_steps_per_second": 2.864, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.17057971502678e-05, |
|
"loss": 0.5341, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_bleu": 0.13384219047455637, |
|
"eval_exact_match": 0.161, |
|
"eval_loss": 0.7724990248680115, |
|
"eval_rouge1": 0.5718059859082061, |
|
"eval_rouge2": 0.40004342552261796, |
|
"eval_rougeL": 0.5526613011742771, |
|
"eval_runtime": 21.9605, |
|
"eval_samples_per_second": 91.073, |
|
"eval_steps_per_second": 2.869, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.00215582578233e-05, |
|
"loss": 0.5368, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_bleu": 0.1330977205657701, |
|
"eval_exact_match": 0.161, |
|
"eval_loss": 0.7691414952278137, |
|
"eval_rouge1": 0.5709709775537742, |
|
"eval_rouge2": 0.40272100034764474, |
|
"eval_rougeL": 0.5533607655778048, |
|
"eval_runtime": 22.1482, |
|
"eval_samples_per_second": 90.301, |
|
"eval_steps_per_second": 2.844, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.83373193653788e-05, |
|
"loss": 0.5326, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_bleu": 0.13382817289236076, |
|
"eval_exact_match": 0.161, |
|
"eval_loss": 0.7643744945526123, |
|
"eval_rouge1": 0.5679296577675541, |
|
"eval_rouge2": 0.40079835184768015, |
|
"eval_rougeL": 0.5518721002250275, |
|
"eval_runtime": 21.8173, |
|
"eval_samples_per_second": 91.671, |
|
"eval_steps_per_second": 2.888, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 7.665308047293428e-05, |
|
"loss": 0.5296, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_bleu": 0.13382829744509098, |
|
"eval_exact_match": 0.161, |
|
"eval_loss": 0.7650749683380127, |
|
"eval_rouge1": 0.5692675142813927, |
|
"eval_rouge2": 0.4018288384287947, |
|
"eval_rougeL": 0.5521131597944772, |
|
"eval_runtime": 21.7709, |
|
"eval_samples_per_second": 91.866, |
|
"eval_steps_per_second": 2.894, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.496884158048978e-05, |
|
"loss": 0.5343, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_bleu": 0.1355027033693307, |
|
"eval_exact_match": 0.1635, |
|
"eval_loss": 0.7636184096336365, |
|
"eval_rouge1": 0.5679441721859034, |
|
"eval_rouge2": 0.4050592356179844, |
|
"eval_rougeL": 0.5514489474326643, |
|
"eval_runtime": 21.8929, |
|
"eval_samples_per_second": 91.354, |
|
"eval_steps_per_second": 2.878, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.328460268804527e-05, |
|
"loss": 0.5109, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_bleu": 0.13581137320800807, |
|
"eval_exact_match": 0.165, |
|
"eval_loss": 0.7763196229934692, |
|
"eval_rouge1": 0.5710735563830984, |
|
"eval_rouge2": 0.4076167525021497, |
|
"eval_rougeL": 0.5546392278395981, |
|
"eval_runtime": 21.9516, |
|
"eval_samples_per_second": 91.11, |
|
"eval_steps_per_second": 2.87, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.160036379560077e-05, |
|
"loss": 0.4847, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_bleu": 0.13710307705006092, |
|
"eval_exact_match": 0.1645, |
|
"eval_loss": 0.7670872807502747, |
|
"eval_rouge1": 0.5697703902827196, |
|
"eval_rouge2": 0.404941700282087, |
|
"eval_rougeL": 0.5520241279397228, |
|
"eval_runtime": 21.8083, |
|
"eval_samples_per_second": 91.708, |
|
"eval_steps_per_second": 2.889, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.991612490315627e-05, |
|
"loss": 0.4791, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_bleu": 0.1361107213351937, |
|
"eval_exact_match": 0.162, |
|
"eval_loss": 0.7662444114685059, |
|
"eval_rouge1": 0.5729589320755841, |
|
"eval_rouge2": 0.40808662503016657, |
|
"eval_rougeL": 0.555974205030025, |
|
"eval_runtime": 21.95, |
|
"eval_samples_per_second": 91.116, |
|
"eval_steps_per_second": 2.87, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 6.823188601071177e-05, |
|
"loss": 0.4765, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_bleu": 0.13494157409269927, |
|
"eval_exact_match": 0.163, |
|
"eval_loss": 0.7643869519233704, |
|
"eval_rouge1": 0.5739569530373543, |
|
"eval_rouge2": 0.40836724097565735, |
|
"eval_rougeL": 0.5571861839936938, |
|
"eval_runtime": 22.0521, |
|
"eval_samples_per_second": 90.694, |
|
"eval_steps_per_second": 2.857, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 6.654764711826725e-05, |
|
"loss": 0.4791, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_bleu": 0.1355371343991678, |
|
"eval_exact_match": 0.1665, |
|
"eval_loss": 0.7635162472724915, |
|
"eval_rouge1": 0.5745793842335204, |
|
"eval_rouge2": 0.41206862743691836, |
|
"eval_rougeL": 0.5587644331139192, |
|
"eval_runtime": 22.3535, |
|
"eval_samples_per_second": 89.472, |
|
"eval_steps_per_second": 2.818, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.486340822582275e-05, |
|
"loss": 0.4779, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_bleu": 0.1349476481247351, |
|
"eval_exact_match": 0.163, |
|
"eval_loss": 0.7575966715812683, |
|
"eval_rouge1": 0.5806974627802324, |
|
"eval_rouge2": 0.4141474945527226, |
|
"eval_rougeL": 0.5638708523820954, |
|
"eval_runtime": 24.2543, |
|
"eval_samples_per_second": 82.46, |
|
"eval_steps_per_second": 2.597, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.317916933337825e-05, |
|
"loss": 0.4723, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.13724470187361304, |
|
"eval_exact_match": 0.1605, |
|
"eval_loss": 0.7616467475891113, |
|
"eval_rouge1": 0.5756965634335589, |
|
"eval_rouge2": 0.4100511997403594, |
|
"eval_rougeL": 0.559460669435164, |
|
"eval_runtime": 23.0958, |
|
"eval_samples_per_second": 86.596, |
|
"eval_steps_per_second": 2.728, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 6.149493044093374e-05, |
|
"loss": 0.4422, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_bleu": 0.13537517858880133, |
|
"eval_exact_match": 0.163, |
|
"eval_loss": 0.7656739354133606, |
|
"eval_rouge1": 0.5705462095820004, |
|
"eval_rouge2": 0.40541664718394044, |
|
"eval_rougeL": 0.5548493921872111, |
|
"eval_runtime": 24.1852, |
|
"eval_samples_per_second": 82.695, |
|
"eval_steps_per_second": 2.605, |
|
"step": 13000 |
|
} |
|
], |
|
"max_steps": 31250, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.693192667037696e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|