|
{ |
|
"best_metric": 14.074385643005371, |
|
"best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L1EXPO-ES-10/checkpoint-700", |
|
"epoch": 3.4057628719886632, |
|
"eval_steps": 50, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 3688.5065763773923, |
|
"learning_rate": 2.840909090909091e-08, |
|
"logits": -1.359458565711975, |
|
"logps": -84.69721221923828, |
|
"loss": 0.0051, |
|
"objective": 0.0046141319908201694, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.0046141319908201694, |
|
"step": 1, |
|
"wo_beta": 14.840873718261719 |
|
}, |
|
{ |
|
"dpo_loss": 2.3264636993408203, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 3322.6545378359765, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.454339623451233, |
|
"logps": -84.50347900390625, |
|
"loss": 4.2778, |
|
"objective": 4.120908737182617, |
|
"ranking_idealized": 0.5225340127944946, |
|
"ranking_idealized_expo": 0.5216836929321289, |
|
"ranking_simple": 0.521258533000946, |
|
"regularize": 4.120908737182617, |
|
"step": 50, |
|
"wo_beta": 15.655658721923828 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 2.8787100315093994, |
|
"eval_logits": -1.4301204681396484, |
|
"eval_logps": -91.78133392333984, |
|
"eval_loss": 5.651101589202881, |
|
"eval_objective": 5.578580379486084, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 5.578580379486084, |
|
"eval_runtime": 307.7497, |
|
"eval_samples_per_second": 18.814, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wo_beta": 16.107044219970703, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 8.834875106811523, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 2883.7985857251942, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.3840159177780151, |
|
"logps": -82.65471649169922, |
|
"loss": 17.3516, |
|
"objective": 17.624128341674805, |
|
"ranking_idealized": 0.5141666531562805, |
|
"ranking_idealized_expo": 0.5137500166893005, |
|
"ranking_simple": 0.5179166793823242, |
|
"regularize": 17.624128341674805, |
|
"step": 100, |
|
"wo_beta": 15.28693675994873 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 7.968704700469971, |
|
"eval_logits": -1.3171318769454956, |
|
"eval_logps": -86.66349792480469, |
|
"eval_loss": 15.683460235595703, |
|
"eval_objective": 15.754798889160156, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 15.754798889160156, |
|
"eval_runtime": 307.2479, |
|
"eval_samples_per_second": 18.845, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 15.626057624816895, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 14.751253128051758, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 2254.7228314416952, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.1572282314300537, |
|
"logps": -80.76160430908203, |
|
"loss": 28.6009, |
|
"objective": 28.620296478271484, |
|
"ranking_idealized": 0.5287500023841858, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.5266666412353516, |
|
"regularize": 28.620296478271484, |
|
"step": 150, |
|
"wo_beta": 15.1625394821167 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 15.000219345092773, |
|
"eval_logits": -1.1259195804595947, |
|
"eval_logps": -81.49861145019531, |
|
"eval_loss": 29.075258255004883, |
|
"eval_objective": 28.90445327758789, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 28.90445327758789, |
|
"eval_runtime": 307.0327, |
|
"eval_samples_per_second": 18.858, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wo_beta": 15.236913681030273, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 18.452308654785156, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 2255.157628060128, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -0.9300950169563293, |
|
"logps": -76.25523376464844, |
|
"loss": 35.0698, |
|
"objective": 35.79060745239258, |
|
"ranking_idealized": 0.51583331823349, |
|
"ranking_idealized_expo": 0.51541668176651, |
|
"ranking_simple": 0.5104166865348816, |
|
"regularize": 35.79060745239258, |
|
"step": 200, |
|
"wo_beta": 15.353928565979004 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 21.391788482666016, |
|
"eval_logits": -0.8775973916053772, |
|
"eval_logps": -82.15784454345703, |
|
"eval_loss": 41.12628173828125, |
|
"eval_objective": 40.45929718017578, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5124223828315735, |
|
"eval_regularize": 40.45929718017578, |
|
"eval_runtime": 307.0481, |
|
"eval_samples_per_second": 18.857, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wo_beta": 14.911209106445312, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 19.97781753540039, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 1862.9598435340467, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -0.7163826823234558, |
|
"logps": -77.4970932006836, |
|
"loss": 37.7822, |
|
"objective": 38.173789978027344, |
|
"ranking_idealized": 0.5166666507720947, |
|
"ranking_idealized_expo": 0.5162500143051147, |
|
"ranking_simple": 0.5112500190734863, |
|
"regularize": 38.173789978027344, |
|
"step": 250, |
|
"wo_beta": 15.578652381896973 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 21.928752899169922, |
|
"eval_logits": -0.641853392124176, |
|
"eval_logps": -83.0038833618164, |
|
"eval_loss": 44.07463836669922, |
|
"eval_objective": 43.393341064453125, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 43.393341064453125, |
|
"eval_runtime": 307.199, |
|
"eval_samples_per_second": 18.848, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 14.620430946350098, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 17.413480758666992, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 1744.6732754071961, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -0.40934881567955017, |
|
"logps": -79.10726165771484, |
|
"loss": 35.2811, |
|
"objective": 35.4559326171875, |
|
"ranking_idealized": 0.4962500035762787, |
|
"ranking_idealized_expo": 0.4950000047683716, |
|
"ranking_simple": 0.502916693687439, |
|
"regularize": 35.4559326171875, |
|
"step": 300, |
|
"wo_beta": 15.202095031738281 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 21.43065071105957, |
|
"eval_logits": -0.5315975546836853, |
|
"eval_logps": -83.84294891357422, |
|
"eval_loss": 43.6626091003418, |
|
"eval_objective": 43.46427536010742, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5320910811424255, |
|
"eval_regularize": 43.46427536010742, |
|
"eval_runtime": 307.206, |
|
"eval_samples_per_second": 18.847, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 14.544736862182617, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 17.524351119995117, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 1787.213275862853, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -0.5016722679138184, |
|
"logps": -80.09149169921875, |
|
"loss": 33.8034, |
|
"objective": 34.494503021240234, |
|
"ranking_idealized": 0.5262500047683716, |
|
"ranking_idealized_expo": 0.5254166722297668, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 34.494503021240234, |
|
"step": 350, |
|
"wo_beta": 15.207830429077148 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 23.330080032348633, |
|
"eval_logits": -0.593406081199646, |
|
"eval_logps": -84.05725860595703, |
|
"eval_loss": 45.264923095703125, |
|
"eval_objective": 45.35862731933594, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 45.35862731933594, |
|
"eval_runtime": 307.0631, |
|
"eval_samples_per_second": 18.856, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wo_beta": 14.60231876373291, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 16.205705642700195, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 1658.338167111395, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -0.5499605536460876, |
|
"logps": -80.1341552734375, |
|
"loss": 30.8702, |
|
"objective": 30.992847442626953, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.5320833325386047, |
|
"ranking_simple": 0.528333306312561, |
|
"regularize": 30.992847442626953, |
|
"step": 400, |
|
"wo_beta": 15.376312255859375 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 23.827035903930664, |
|
"eval_logits": -0.62712162733078, |
|
"eval_logps": -82.20217895507812, |
|
"eval_loss": 47.269775390625, |
|
"eval_objective": 47.26739501953125, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 47.26739501953125, |
|
"eval_runtime": 307.8491, |
|
"eval_samples_per_second": 18.808, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wo_beta": 14.336685180664062, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 14.983359336853027, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 1630.7914622079197, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.5104279518127441, |
|
"logps": -78.46994018554688, |
|
"loss": 29.5027, |
|
"objective": 29.416109085083008, |
|
"ranking_idealized": 0.5195833444595337, |
|
"ranking_idealized_expo": 0.5191666483879089, |
|
"ranking_simple": 0.5170833468437195, |
|
"regularize": 29.416109085083008, |
|
"step": 450, |
|
"wo_beta": 16.006542205810547 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 25.179445266723633, |
|
"eval_logits": -0.5507553815841675, |
|
"eval_logps": -82.72330474853516, |
|
"eval_loss": 49.341182708740234, |
|
"eval_objective": 49.47369384765625, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5201863646507263, |
|
"eval_regularize": 49.47369384765625, |
|
"eval_runtime": 307.2653, |
|
"eval_samples_per_second": 18.844, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 14.343340873718262, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 13.962078094482422, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 1627.1136853969401, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.4764183461666107, |
|
"logps": -78.08194732666016, |
|
"loss": 27.7693, |
|
"objective": 28.35871696472168, |
|
"ranking_idealized": 0.49791666865348816, |
|
"ranking_idealized_expo": 0.4970833361148834, |
|
"ranking_simple": 0.503333330154419, |
|
"regularize": 28.35871696472168, |
|
"step": 500, |
|
"wo_beta": 15.235273361206055 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 24.62739372253418, |
|
"eval_logits": -0.5208410024642944, |
|
"eval_logps": -83.14039611816406, |
|
"eval_loss": 48.41379928588867, |
|
"eval_objective": 48.561553955078125, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5181159377098083, |
|
"eval_regularize": 48.561553955078125, |
|
"eval_runtime": 313.5843, |
|
"eval_samples_per_second": 18.464, |
|
"eval_steps_per_second": 1.54, |
|
"eval_wo_beta": 14.325936317443848, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 14.243717193603516, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 1567.3979312158642, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.4176904857158661, |
|
"logps": -79.26414489746094, |
|
"loss": 26.3455, |
|
"objective": 27.205766677856445, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5104166865348816, |
|
"ranking_simple": 0.5066666603088379, |
|
"regularize": 27.205766677856445, |
|
"step": 550, |
|
"wo_beta": 15.118928909301758 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 24.8875732421875, |
|
"eval_logits": -0.5377052426338196, |
|
"eval_logps": -81.67108154296875, |
|
"eval_loss": 49.475399017333984, |
|
"eval_objective": 49.75130081176758, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 49.75130081176758, |
|
"eval_runtime": 307.1071, |
|
"eval_samples_per_second": 18.853, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wo_beta": 14.233548164367676, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 13.567865371704102, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 1510.6295336293697, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.4348069727420807, |
|
"logps": -78.36796569824219, |
|
"loss": 25.3777, |
|
"objective": 25.583778381347656, |
|
"ranking_idealized": 0.51541668176651, |
|
"ranking_idealized_expo": 0.5149999856948853, |
|
"ranking_simple": 0.5049999952316284, |
|
"regularize": 25.583778381347656, |
|
"step": 600, |
|
"wo_beta": 15.017353057861328 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 24.62792205810547, |
|
"eval_logits": -0.5633407235145569, |
|
"eval_logps": -81.369873046875, |
|
"eval_loss": 48.80782699584961, |
|
"eval_objective": 49.26447677612305, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 49.26447677612305, |
|
"eval_runtime": 307.6769, |
|
"eval_samples_per_second": 18.818, |
|
"eval_steps_per_second": 1.57, |
|
"eval_wo_beta": 14.197225570678711, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 12.823990821838379, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 1590.0809438470442, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.45912277698516846, |
|
"logps": -77.55583190917969, |
|
"loss": 24.4429, |
|
"objective": 24.74443817138672, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.527916669845581, |
|
"ranking_simple": 0.5270833373069763, |
|
"regularize": 24.74443817138672, |
|
"step": 650, |
|
"wo_beta": 15.796711921691895 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 25.341928482055664, |
|
"eval_logits": -0.475749671459198, |
|
"eval_logps": -81.65654754638672, |
|
"eval_loss": 49.71050262451172, |
|
"eval_objective": 49.81724548339844, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5191511511802673, |
|
"eval_regularize": 49.81724548339844, |
|
"eval_runtime": 318.0633, |
|
"eval_samples_per_second": 18.204, |
|
"eval_steps_per_second": 1.519, |
|
"eval_wo_beta": 14.336784362792969, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 11.803265571594238, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 1573.6320557673569, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.44002941250801086, |
|
"logps": -77.62532043457031, |
|
"loss": 22.5358, |
|
"objective": 22.4056339263916, |
|
"ranking_idealized": 0.5129166841506958, |
|
"ranking_idealized_expo": 0.5108333230018616, |
|
"ranking_simple": 0.5058333277702332, |
|
"regularize": 22.4056339263916, |
|
"step": 700, |
|
"wo_beta": 15.435830116271973 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 26.279430389404297, |
|
"eval_logits": -0.5139885544776917, |
|
"eval_logps": -80.61864471435547, |
|
"eval_loss": 51.679359436035156, |
|
"eval_objective": 51.56280517578125, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 51.56280517578125, |
|
"eval_runtime": 307.1755, |
|
"eval_samples_per_second": 18.849, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 14.074385643005371, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 10.530390739440918, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 1447.9001618253178, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.4438280165195465, |
|
"logps": -79.3443374633789, |
|
"loss": 20.6864, |
|
"objective": 20.564796447753906, |
|
"ranking_idealized": 0.5129166841506958, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 20.564796447753906, |
|
"step": 750, |
|
"wo_beta": 15.44257640838623 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 25.791982650756836, |
|
"eval_logits": -0.4510954022407532, |
|
"eval_logps": -83.94737243652344, |
|
"eval_loss": 50.90283966064453, |
|
"eval_objective": 51.139808654785156, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 51.139808654785156, |
|
"eval_runtime": 307.3519, |
|
"eval_samples_per_second": 18.838, |
|
"eval_steps_per_second": 1.571, |
|
"eval_wo_beta": 14.28470230102539, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 10.331942558288574, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 1416.622520151804, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.4105643630027771, |
|
"logps": -81.301513671875, |
|
"loss": 19.5881, |
|
"objective": 19.708881378173828, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5162500143051147, |
|
"regularize": 19.708881378173828, |
|
"step": 800, |
|
"wo_beta": 15.041363716125488 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 26.223230361938477, |
|
"eval_logits": -0.45186811685562134, |
|
"eval_logps": -84.14128112792969, |
|
"eval_loss": 51.44403076171875, |
|
"eval_objective": 51.835060119628906, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 51.835060119628906, |
|
"eval_runtime": 307.4841, |
|
"eval_samples_per_second": 18.83, |
|
"eval_steps_per_second": 1.571, |
|
"eval_wo_beta": 14.21197509765625, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 9.117318153381348, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 1511.1151822215572, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.4155246615409851, |
|
"logps": -80.53886413574219, |
|
"loss": 18.5246, |
|
"objective": 18.382122039794922, |
|
"ranking_idealized": 0.5145833492279053, |
|
"ranking_idealized_expo": 0.5133333206176758, |
|
"ranking_simple": 0.5141666531562805, |
|
"regularize": 18.382122039794922, |
|
"step": 850, |
|
"wo_beta": 15.248088836669922 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 26.526891708374023, |
|
"eval_logits": -0.5061497688293457, |
|
"eval_logps": -82.96385192871094, |
|
"eval_loss": 52.282501220703125, |
|
"eval_objective": 52.2313346862793, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 52.2313346862793, |
|
"eval_runtime": 307.2591, |
|
"eval_samples_per_second": 18.844, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 14.120504379272461, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 8.65651798248291, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 1500.724487309093, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.4497624337673187, |
|
"logps": -79.77722930908203, |
|
"loss": 17.4115, |
|
"objective": 17.32391929626465, |
|
"ranking_idealized": 0.518750011920929, |
|
"ranking_idealized_expo": 0.5183333158493042, |
|
"ranking_simple": 0.5179166793823242, |
|
"regularize": 17.32391929626465, |
|
"step": 900, |
|
"wo_beta": 15.50606918334961 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 26.54765510559082, |
|
"eval_logits": -0.5079280138015747, |
|
"eval_logps": -83.98892211914062, |
|
"eval_loss": 52.268577575683594, |
|
"eval_objective": 52.27949905395508, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5289855003356934, |
|
"eval_regularize": 52.27949905395508, |
|
"eval_runtime": 307.3895, |
|
"eval_samples_per_second": 18.836, |
|
"eval_steps_per_second": 1.571, |
|
"eval_wo_beta": 14.197465896606445, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 8.308319091796875, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 1453.978726592987, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.4583713412284851, |
|
"logps": -80.14180755615234, |
|
"loss": 16.2052, |
|
"objective": 16.429227828979492, |
|
"ranking_idealized": 0.5079166889190674, |
|
"ranking_idealized_expo": 0.5058333277702332, |
|
"ranking_simple": 0.5074999928474426, |
|
"regularize": 16.429227828979492, |
|
"step": 950, |
|
"wo_beta": 15.596735000610352 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 26.657089233398438, |
|
"eval_logits": -0.46912574768066406, |
|
"eval_logps": -83.12673950195312, |
|
"eval_loss": 52.40416717529297, |
|
"eval_objective": 52.389137268066406, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 52.389137268066406, |
|
"eval_runtime": 307.39, |
|
"eval_samples_per_second": 18.836, |
|
"eval_steps_per_second": 1.571, |
|
"eval_wo_beta": 14.298489570617676, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 7.868130683898926, |
|
"epoch": 2.838923004251299, |
|
"grad_norm": 1371.5890318912852, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.3988785743713379, |
|
"logps": -78.35469055175781, |
|
"loss": 15.0384, |
|
"objective": 15.024641990661621, |
|
"ranking_idealized": 0.5112500190734863, |
|
"ranking_idealized_expo": 0.5112500190734863, |
|
"ranking_simple": 0.5066666603088379, |
|
"regularize": 15.024641990661621, |
|
"step": 1000, |
|
"wo_beta": 15.029138565063477 |
|
}, |
|
{ |
|
"epoch": 2.838923004251299, |
|
"eval_dpo_loss": 26.16453742980957, |
|
"eval_logits": -0.4550507366657257, |
|
"eval_logps": -82.82769012451172, |
|
"eval_loss": 51.76364517211914, |
|
"eval_objective": 51.644718170166016, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5263975262641907, |
|
"eval_regularize": 51.644718170166016, |
|
"eval_runtime": 307.9823, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wo_beta": 14.203557968139648, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 7.561364650726318, |
|
"epoch": 2.9806329711856403, |
|
"grad_norm": 1438.5247466117469, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.39186450839042664, |
|
"logps": -79.17125701904297, |
|
"loss": 14.381, |
|
"objective": 14.444308280944824, |
|
"ranking_idealized": 0.5183333158493042, |
|
"ranking_idealized_expo": 0.5174999833106995, |
|
"ranking_simple": 0.5245833396911621, |
|
"regularize": 14.444308280944824, |
|
"step": 1050, |
|
"wo_beta": 15.485770225524902 |
|
}, |
|
{ |
|
"epoch": 2.9806329711856403, |
|
"eval_dpo_loss": 26.504281997680664, |
|
"eval_logits": -0.4121534526348114, |
|
"eval_logps": -83.05400848388672, |
|
"eval_loss": 51.82139587402344, |
|
"eval_objective": 51.90236282348633, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 51.90236282348633, |
|
"eval_runtime": 307.2005, |
|
"eval_samples_per_second": 18.848, |
|
"eval_steps_per_second": 1.572, |
|
"eval_wo_beta": 14.16685962677002, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 6.576974868774414, |
|
"epoch": 3.122342938119981, |
|
"grad_norm": 1479.1539218663233, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits": -0.37988409399986267, |
|
"logps": -80.17594146728516, |
|
"loss": 12.5437, |
|
"objective": 12.73067855834961, |
|
"ranking_idealized": 0.5299999713897705, |
|
"ranking_idealized_expo": 0.528333306312561, |
|
"ranking_simple": 0.5266666412353516, |
|
"regularize": 12.73067855834961, |
|
"step": 1100, |
|
"wo_beta": 15.62684440612793 |
|
}, |
|
{ |
|
"epoch": 3.122342938119981, |
|
"eval_dpo_loss": 26.185077667236328, |
|
"eval_logits": -0.4407959282398224, |
|
"eval_logps": -83.87307739257812, |
|
"eval_loss": 51.601688385009766, |
|
"eval_objective": 51.89978790283203, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 51.89978790283203, |
|
"eval_runtime": 308.2578, |
|
"eval_samples_per_second": 18.783, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wo_beta": 14.176854133605957, |
|
"step": 1100 |
|
}, |
|
{ |
|
"dpo_loss": 5.700263023376465, |
|
"epoch": 3.264052905054322, |
|
"grad_norm": 1402.4578249025758, |
|
"learning_rate": 1.617055052228768e-06, |
|
"logits": -0.39078637957572937, |
|
"logps": -80.27751159667969, |
|
"loss": 11.3828, |
|
"objective": 11.245396614074707, |
|
"ranking_idealized": 0.5091666579246521, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5104166865348816, |
|
"regularize": 11.245396614074707, |
|
"step": 1150, |
|
"wo_beta": 15.349074363708496 |
|
}, |
|
{ |
|
"epoch": 3.264052905054322, |
|
"eval_dpo_loss": 26.20229148864746, |
|
"eval_logits": -0.4506087601184845, |
|
"eval_logps": -84.2103500366211, |
|
"eval_loss": 51.586910247802734, |
|
"eval_objective": 51.72679138183594, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 51.72679138183594, |
|
"eval_runtime": 307.5329, |
|
"eval_samples_per_second": 18.827, |
|
"eval_steps_per_second": 1.571, |
|
"eval_wo_beta": 14.176774024963379, |
|
"step": 1150 |
|
}, |
|
{ |
|
"dpo_loss": 5.425318241119385, |
|
"epoch": 3.4057628719886632, |
|
"grad_norm": 1477.9539586967678, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits": -0.3910551071166992, |
|
"logps": -81.23528289794922, |
|
"loss": 10.5152, |
|
"objective": 10.480737686157227, |
|
"ranking_idealized": 0.5079166889190674, |
|
"ranking_idealized_expo": 0.5079166889190674, |
|
"ranking_simple": 0.5049999952316284, |
|
"regularize": 10.480737686157227, |
|
"step": 1200, |
|
"wo_beta": 15.531842231750488 |
|
}, |
|
{ |
|
"epoch": 3.4057628719886632, |
|
"eval_dpo_loss": 26.307344436645508, |
|
"eval_logits": -0.4568469524383545, |
|
"eval_logps": -84.14852905273438, |
|
"eval_loss": 51.58594512939453, |
|
"eval_objective": 51.662628173828125, |
|
"eval_ranking_idealized": 0.5212215185165405, |
|
"eval_ranking_idealized_expo": 0.5212215185165405, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 51.662628173828125, |
|
"eval_runtime": 307.0369, |
|
"eval_samples_per_second": 18.858, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wo_beta": 14.14501953125, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4057628719886632, |
|
"step": 1200, |
|
"total_flos": 0.0, |
|
"train_loss": 2.660881093343099, |
|
"train_runtime": 6833.7834, |
|
"train_samples_per_second": 37.17, |
|
"train_steps_per_second": 0.258 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|