{ "best_metric": 14.074385643005371, "best_model_checkpoint": "./qwen2.5-0.5b/qwen2.5-0.5b-expo-L1EXPO-ES-10/checkpoint-700", "epoch": 3.4057628719886632, "eval_steps": 50, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 3688.5065763773923, "learning_rate": 2.840909090909091e-08, "logits": -1.359458565711975, "logps": -84.69721221923828, "loss": 0.0051, "objective": 0.0046141319908201694, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.0046141319908201694, "step": 1, "wo_beta": 14.840873718261719 }, { "dpo_loss": 2.3264636993408203, "epoch": 0.14170996693434104, "grad_norm": 3322.6545378359765, "learning_rate": 1.4204545454545458e-06, "logits": -1.454339623451233, "logps": -84.50347900390625, "loss": 4.2778, "objective": 4.120908737182617, "ranking_idealized": 0.5225340127944946, "ranking_idealized_expo": 0.5216836929321289, "ranking_simple": 0.521258533000946, "regularize": 4.120908737182617, "step": 50, "wo_beta": 15.655658721923828 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 2.8787100315093994, "eval_logits": -1.4301204681396484, "eval_logps": -91.78133392333984, "eval_loss": 5.651101589202881, "eval_objective": 5.578580379486084, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 5.578580379486084, "eval_runtime": 307.7497, "eval_samples_per_second": 18.814, "eval_steps_per_second": 1.569, "eval_wo_beta": 16.107044219970703, "step": 50 }, { "dpo_loss": 8.834875106811523, "epoch": 0.2834199338686821, "grad_norm": 2883.7985857251942, "learning_rate": 2.8409090909090916e-06, "logits": -1.3840159177780151, "logps": -82.65471649169922, "loss": 17.3516, "objective": 17.624128341674805, "ranking_idealized": 0.5141666531562805, "ranking_idealized_expo": 0.5137500166893005, "ranking_simple": 0.5179166793823242, "regularize": 17.624128341674805, "step": 100, "wo_beta": 15.28693675994873 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 7.968704700469971, "eval_logits": -1.3171318769454956, "eval_logps": -86.66349792480469, "eval_loss": 15.683460235595703, "eval_objective": 15.754798889160156, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 15.754798889160156, "eval_runtime": 307.2479, "eval_samples_per_second": 18.845, "eval_steps_per_second": 1.572, "eval_wo_beta": 15.626057624816895, "step": 100 }, { "dpo_loss": 14.751253128051758, "epoch": 0.42512990080302315, "grad_norm": 2254.7228314416952, "learning_rate": 4.2613636363636365e-06, "logits": -1.1572282314300537, "logps": -80.76160430908203, "loss": 28.6009, "objective": 28.620296478271484, "ranking_idealized": 0.5287500023841858, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.5266666412353516, "regularize": 28.620296478271484, "step": 150, "wo_beta": 15.1625394821167 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 15.000219345092773, "eval_logits": -1.1259195804595947, "eval_logps": -81.49861145019531, "eval_loss": 29.075258255004883, "eval_objective": 28.90445327758789, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 28.90445327758789, "eval_runtime": 307.0327, "eval_samples_per_second": 18.858, "eval_steps_per_second": 1.573, "eval_wo_beta": 15.236913681030273, "step": 150 }, { "dpo_loss": 18.452308654785156, "epoch": 0.5668398677373642, "grad_norm": 2255.157628060128, "learning_rate": 4.997168347957521e-06, "logits": -0.9300950169563293, "logps": -76.25523376464844, "loss": 35.0698, "objective": 35.79060745239258, "ranking_idealized": 0.51583331823349, "ranking_idealized_expo": 0.51541668176651, "ranking_simple": 0.5104166865348816, "regularize": 35.79060745239258, "step": 200, "wo_beta": 15.353928565979004 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 21.391788482666016, "eval_logits": -0.8775973916053772, "eval_logps": -82.15784454345703, "eval_loss": 41.12628173828125, "eval_objective": 40.45929718017578, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5124223828315735, "eval_regularize": 40.45929718017578, "eval_runtime": 307.0481, "eval_samples_per_second": 18.857, "eval_steps_per_second": 1.573, "eval_wo_beta": 14.911209106445312, "step": 200 }, { "dpo_loss": 19.97781753540039, "epoch": 0.7085498346717053, "grad_norm": 1862.9598435340467, "learning_rate": 4.973122855144066e-06, "logits": -0.7163826823234558, "logps": -77.4970932006836, "loss": 37.7822, "objective": 38.173789978027344, "ranking_idealized": 0.5166666507720947, "ranking_idealized_expo": 0.5162500143051147, "ranking_simple": 0.5112500190734863, "regularize": 38.173789978027344, "step": 250, "wo_beta": 15.578652381896973 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 21.928752899169922, "eval_logits": -0.641853392124176, "eval_logps": -83.0038833618164, "eval_loss": 44.07463836669922, "eval_objective": 43.393341064453125, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 43.393341064453125, "eval_runtime": 307.199, "eval_samples_per_second": 18.848, "eval_steps_per_second": 1.572, "eval_wo_beta": 14.620430946350098, "step": 250 }, { "dpo_loss": 17.413480758666992, "epoch": 0.8502598016060463, "grad_norm": 1744.6732754071961, "learning_rate": 4.924776641419513e-06, "logits": -0.40934881567955017, "logps": -79.10726165771484, "loss": 35.2811, "objective": 35.4559326171875, "ranking_idealized": 0.4962500035762787, "ranking_idealized_expo": 0.4950000047683716, "ranking_simple": 0.502916693687439, "regularize": 35.4559326171875, "step": 300, "wo_beta": 15.202095031738281 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 21.43065071105957, "eval_logits": -0.5315975546836853, "eval_logps": -83.84294891357422, "eval_loss": 43.6626091003418, "eval_objective": 43.46427536010742, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5320910811424255, "eval_regularize": 43.46427536010742, "eval_runtime": 307.206, "eval_samples_per_second": 18.847, "eval_steps_per_second": 1.572, "eval_wo_beta": 14.544736862182617, "step": 300 }, { "dpo_loss": 17.524351119995117, "epoch": 0.9919697685403873, "grad_norm": 1787.213275862853, "learning_rate": 4.8526047530778175e-06, "logits": -0.5016722679138184, "logps": -80.09149169921875, "loss": 33.8034, "objective": 34.494503021240234, "ranking_idealized": 0.5262500047683716, "ranking_idealized_expo": 0.5254166722297668, "ranking_simple": 0.5249999761581421, "regularize": 34.494503021240234, "step": 350, "wo_beta": 15.207830429077148 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 23.330080032348633, "eval_logits": -0.593406081199646, "eval_logps": -84.05725860595703, "eval_loss": 45.264923095703125, "eval_objective": 45.35862731933594, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 45.35862731933594, "eval_runtime": 307.0631, "eval_samples_per_second": 18.856, "eval_steps_per_second": 1.573, "eval_wo_beta": 14.60231876373291, "step": 350 }, { "dpo_loss": 16.205705642700195, "epoch": 1.1336797354747283, "grad_norm": 1658.338167111395, "learning_rate": 4.757316345716554e-06, "logits": -0.5499605536460876, "logps": -80.1341552734375, "loss": 30.8702, "objective": 30.992847442626953, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.5320833325386047, "ranking_simple": 0.528333306312561, "regularize": 30.992847442626953, "step": 400, "wo_beta": 15.376312255859375 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 23.827035903930664, "eval_logits": -0.62712162733078, "eval_logps": -82.20217895507812, "eval_loss": 47.269775390625, "eval_objective": 47.26739501953125, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 47.26739501953125, "eval_runtime": 307.8491, "eval_samples_per_second": 18.808, "eval_steps_per_second": 1.569, "eval_wo_beta": 14.336685180664062, "step": 400 }, { "dpo_loss": 14.983359336853027, "epoch": 1.2753897024090695, "grad_norm": 1630.7914622079197, "learning_rate": 4.639847716126855e-06, "logits": -0.5104279518127441, "logps": -78.46994018554688, "loss": 29.5027, "objective": 29.416109085083008, "ranking_idealized": 0.5195833444595337, "ranking_idealized_expo": 0.5191666483879089, "ranking_simple": 0.5170833468437195, "regularize": 29.416109085083008, "step": 450, "wo_beta": 16.006542205810547 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 25.179445266723633, "eval_logits": -0.5507553815841675, "eval_logps": -82.72330474853516, "eval_loss": 49.341182708740234, "eval_objective": 49.47369384765625, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 49.47369384765625, "eval_runtime": 307.2653, "eval_samples_per_second": 18.844, "eval_steps_per_second": 1.572, "eval_wo_beta": 14.343340873718262, "step": 450 }, { "dpo_loss": 13.962078094482422, "epoch": 1.4170996693434104, "grad_norm": 1627.1136853969401, "learning_rate": 4.501353102310901e-06, "logits": -0.4764183461666107, "logps": -78.08194732666016, "loss": 27.7693, "objective": 28.35871696472168, "ranking_idealized": 0.49791666865348816, "ranking_idealized_expo": 0.4970833361148834, "ranking_simple": 0.503333330154419, "regularize": 28.35871696472168, "step": 500, "wo_beta": 15.235273361206055 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 24.62739372253418, "eval_logits": -0.5208410024642944, "eval_logps": -83.14039611816406, "eval_loss": 48.41379928588867, "eval_objective": 48.561553955078125, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5181159377098083, "eval_regularize": 48.561553955078125, "eval_runtime": 313.5843, "eval_samples_per_second": 18.464, "eval_steps_per_second": 1.54, "eval_wo_beta": 14.325936317443848, "step": 500 }, { "dpo_loss": 14.243717193603516, "epoch": 1.5588096362777515, "grad_norm": 1567.3979312158642, "learning_rate": 4.34319334202531e-06, "logits": -0.4176904857158661, "logps": -79.26414489746094, "loss": 26.3455, "objective": 27.205766677856445, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5104166865348816, "ranking_simple": 0.5066666603088379, "regularize": 27.205766677856445, "step": 550, "wo_beta": 15.118928909301758 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 24.8875732421875, "eval_logits": -0.5377052426338196, "eval_logps": -81.67108154296875, "eval_loss": 49.475399017333984, "eval_objective": 49.75130081176758, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 49.75130081176758, "eval_runtime": 307.1071, "eval_samples_per_second": 18.853, "eval_steps_per_second": 1.573, "eval_wo_beta": 14.233548164367676, "step": 550 }, { "dpo_loss": 13.567865371704102, "epoch": 1.7005196032120926, "grad_norm": 1510.6295336293697, "learning_rate": 4.16692250129073e-06, "logits": -0.4348069727420807, "logps": -78.36796569824219, "loss": 25.3777, "objective": 25.583778381347656, "ranking_idealized": 0.51541668176651, "ranking_idealized_expo": 0.5149999856948853, "ranking_simple": 0.5049999952316284, "regularize": 25.583778381347656, "step": 600, "wo_beta": 15.017353057861328 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 24.62792205810547, "eval_logits": -0.5633407235145569, "eval_logps": -81.369873046875, "eval_loss": 48.80782699584961, "eval_objective": 49.26447677612305, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 49.26447677612305, "eval_runtime": 307.6769, "eval_samples_per_second": 18.818, "eval_steps_per_second": 1.57, "eval_wo_beta": 14.197225570678711, "step": 600 }, { "dpo_loss": 12.823990821838379, "epoch": 1.8422295701464337, "grad_norm": 1590.0809438470442, "learning_rate": 3.974272604254906e-06, "logits": -0.45912277698516846, "logps": -77.55583190917969, "loss": 24.4429, "objective": 24.74443817138672, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.527916669845581, "ranking_simple": 0.5270833373069763, "regularize": 24.74443817138672, "step": 650, "wo_beta": 15.796711921691895 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 25.341928482055664, "eval_logits": -0.475749671459198, "eval_logps": -81.65654754638672, "eval_loss": 49.71050262451172, "eval_objective": 49.81724548339844, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 49.81724548339844, "eval_runtime": 318.0633, "eval_samples_per_second": 18.204, "eval_steps_per_second": 1.519, "eval_wo_beta": 14.336784362792969, "step": 650 }, { "dpo_loss": 11.803265571594238, "epoch": 1.9839395370807746, "grad_norm": 1573.6320557673569, "learning_rate": 3.767136614452458e-06, "logits": -0.44002941250801086, "logps": -77.62532043457031, "loss": 22.5358, "objective": 22.4056339263916, "ranking_idealized": 0.5129166841506958, "ranking_idealized_expo": 0.5108333230018616, "ranking_simple": 0.5058333277702332, "regularize": 22.4056339263916, "step": 700, "wo_beta": 15.435830116271973 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 26.279430389404297, "eval_logits": -0.5139885544776917, "eval_logps": -80.61864471435547, "eval_loss": 51.679359436035156, "eval_objective": 51.56280517578125, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 51.56280517578125, "eval_runtime": 307.1755, "eval_samples_per_second": 18.849, "eval_steps_per_second": 1.572, "eval_wo_beta": 14.074385643005371, "step": 700 }, { "dpo_loss": 10.530390739440918, "epoch": 2.1256495040151155, "grad_norm": 1447.9001618253178, "learning_rate": 3.547549834686222e-06, "logits": -0.4438280165195465, "logps": -79.3443374633789, "loss": 20.6864, "objective": 20.564796447753906, "ranking_idealized": 0.5129166841506958, "ranking_idealized_expo": 0.5112500190734863, "ranking_simple": 0.512499988079071, "regularize": 20.564796447753906, "step": 750, "wo_beta": 15.44257640838623 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 25.791982650756836, "eval_logits": -0.4510954022407532, "eval_logps": -83.94737243652344, "eval_loss": 50.90283966064453, "eval_objective": 51.139808654785156, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 51.139808654785156, "eval_runtime": 307.3519, "eval_samples_per_second": 18.838, "eval_steps_per_second": 1.571, "eval_wo_beta": 14.28470230102539, "step": 750 }, { "dpo_loss": 10.331942558288574, "epoch": 2.2673594709494567, "grad_norm": 1416.622520151804, "learning_rate": 3.3176699082935546e-06, "logits": -0.4105643630027771, "logps": -81.301513671875, "loss": 19.5881, "objective": 19.708881378173828, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5162500143051147, "regularize": 19.708881378173828, "step": 800, "wo_beta": 15.041363716125488 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 26.223230361938477, "eval_logits": -0.45186811685562134, "eval_logps": -84.14128112792969, "eval_loss": 51.44403076171875, "eval_objective": 51.835060119628906, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 51.835060119628906, "eval_runtime": 307.4841, "eval_samples_per_second": 18.83, "eval_steps_per_second": 1.571, "eval_wo_beta": 14.21197509765625, "step": 800 }, { "dpo_loss": 9.117318153381348, "epoch": 2.409069437883798, "grad_norm": 1511.1151822215572, "learning_rate": 3.0797556183036582e-06, "logits": -0.4155246615409851, "logps": -80.53886413574219, "loss": 18.5246, "objective": 18.382122039794922, "ranking_idealized": 0.5145833492279053, "ranking_idealized_expo": 0.5133333206176758, "ranking_simple": 0.5141666531562805, "regularize": 18.382122039794922, "step": 850, "wo_beta": 15.248088836669922 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 26.526891708374023, "eval_logits": -0.5061497688293457, "eval_logps": -82.96385192871094, "eval_loss": 52.282501220703125, "eval_objective": 52.2313346862793, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5284678936004639, "eval_regularize": 52.2313346862793, "eval_runtime": 307.2591, "eval_samples_per_second": 18.844, "eval_steps_per_second": 1.572, "eval_wo_beta": 14.120504379272461, "step": 850 }, { "dpo_loss": 8.65651798248291, "epoch": 2.550779404818139, "grad_norm": 1500.724487309093, "learning_rate": 2.8361446928038298e-06, "logits": -0.4497624337673187, "logps": -79.77722930908203, "loss": 17.4115, "objective": 17.32391929626465, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5183333158493042, "ranking_simple": 0.5179166793823242, "regularize": 17.32391929626465, "step": 900, "wo_beta": 15.50606918334961 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 26.54765510559082, "eval_logits": -0.5079280138015747, "eval_logps": -83.98892211914062, "eval_loss": 52.268577575683594, "eval_objective": 52.27949905395508, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5289855003356934, "eval_regularize": 52.27949905395508, "eval_runtime": 307.3895, "eval_samples_per_second": 18.836, "eval_steps_per_second": 1.571, "eval_wo_beta": 14.197465896606445, "step": 900 }, { "dpo_loss": 8.308319091796875, "epoch": 2.69248937175248, "grad_norm": 1453.978726592987, "learning_rate": 2.5892308345974517e-06, "logits": -0.4583713412284851, "logps": -80.14180755615234, "loss": 16.2052, "objective": 16.429227828979492, "ranking_idealized": 0.5079166889190674, "ranking_idealized_expo": 0.5058333277702332, "ranking_simple": 0.5074999928474426, "regularize": 16.429227828979492, "step": 950, "wo_beta": 15.596735000610352 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 26.657089233398438, "eval_logits": -0.46912574768066406, "eval_logps": -83.12673950195312, "eval_loss": 52.40416717529297, "eval_objective": 52.389137268066406, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 52.389137268066406, "eval_runtime": 307.39, "eval_samples_per_second": 18.836, "eval_steps_per_second": 1.571, "eval_wo_beta": 14.298489570617676, "step": 950 }, { "dpo_loss": 7.868130683898926, "epoch": 2.838923004251299, "grad_norm": 1371.5890318912852, "learning_rate": 2.341440200858589e-06, "logits": -0.3988785743713379, "logps": -78.35469055175781, "loss": 15.0384, "objective": 15.024641990661621, "ranking_idealized": 0.5112500190734863, "ranking_idealized_expo": 0.5112500190734863, "ranking_simple": 0.5066666603088379, "regularize": 15.024641990661621, "step": 1000, "wo_beta": 15.029138565063477 }, { "epoch": 2.838923004251299, "eval_dpo_loss": 26.16453742980957, "eval_logits": -0.4550507366657257, "eval_logps": -82.82769012451172, "eval_loss": 51.76364517211914, "eval_objective": 51.644718170166016, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 51.644718170166016, "eval_runtime": 307.9823, "eval_samples_per_second": 18.8, "eval_steps_per_second": 1.568, "eval_wo_beta": 14.203557968139648, "step": 1000 }, { "dpo_loss": 7.561364650726318, "epoch": 2.9806329711856403, "grad_norm": 1438.5247466117469, "learning_rate": 2.0952075638923656e-06, "logits": -0.39186450839042664, "logps": -79.17125701904297, "loss": 14.381, "objective": 14.444308280944824, "ranking_idealized": 0.5183333158493042, "ranking_idealized_expo": 0.5174999833106995, "ranking_simple": 0.5245833396911621, "regularize": 14.444308280944824, "step": 1050, "wo_beta": 15.485770225524902 }, { "epoch": 2.9806329711856403, "eval_dpo_loss": 26.504281997680664, "eval_logits": -0.4121534526348114, "eval_logps": -83.05400848388672, "eval_loss": 51.82139587402344, "eval_objective": 51.90236282348633, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 51.90236282348633, "eval_runtime": 307.2005, "eval_samples_per_second": 18.848, "eval_steps_per_second": 1.572, "eval_wo_beta": 14.16685962677002, "step": 1050 }, { "dpo_loss": 6.576974868774414, "epoch": 3.122342938119981, "grad_norm": 1479.1539218663233, "learning_rate": 1.852952387243698e-06, "logits": -0.37988409399986267, "logps": -80.17594146728516, "loss": 12.5437, "objective": 12.73067855834961, "ranking_idealized": 0.5299999713897705, "ranking_idealized_expo": 0.528333306312561, "ranking_simple": 0.5266666412353516, "regularize": 12.73067855834961, "step": 1100, "wo_beta": 15.62684440612793 }, { "epoch": 3.122342938119981, "eval_dpo_loss": 26.185077667236328, "eval_logits": -0.4407959282398224, "eval_logps": -83.87307739257812, "eval_loss": 51.601688385009766, "eval_objective": 51.89978790283203, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 51.89978790283203, "eval_runtime": 308.2578, "eval_samples_per_second": 18.783, "eval_steps_per_second": 1.567, "eval_wo_beta": 14.176854133605957, "step": 1100 }, { "dpo_loss": 5.700263023376465, "epoch": 3.264052905054322, "grad_norm": 1402.4578249025758, "learning_rate": 1.617055052228768e-06, "logits": -0.39078637957572937, "logps": -80.27751159667969, "loss": 11.3828, "objective": 11.245396614074707, "ranking_idealized": 0.5091666579246521, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5104166865348816, "regularize": 11.245396614074707, "step": 1150, "wo_beta": 15.349074363708496 }, { "epoch": 3.264052905054322, "eval_dpo_loss": 26.20229148864746, "eval_logits": -0.4506087601184845, "eval_logps": -84.2103500366211, "eval_loss": 51.586910247802734, "eval_objective": 51.72679138183594, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 51.72679138183594, "eval_runtime": 307.5329, "eval_samples_per_second": 18.827, "eval_steps_per_second": 1.571, "eval_wo_beta": 14.176774024963379, "step": 1150 }, { "dpo_loss": 5.425318241119385, "epoch": 3.4057628719886632, "grad_norm": 1477.9539586967678, "learning_rate": 1.3898334684855647e-06, "logits": -0.3910551071166992, "logps": -81.23528289794922, "loss": 10.5152, "objective": 10.480737686157227, "ranking_idealized": 0.5079166889190674, "ranking_idealized_expo": 0.5079166889190674, "ranking_simple": 0.5049999952316284, "regularize": 10.480737686157227, "step": 1200, "wo_beta": 15.531842231750488 }, { "epoch": 3.4057628719886632, "eval_dpo_loss": 26.307344436645508, "eval_logits": -0.4568469524383545, "eval_logps": -84.14852905273438, "eval_loss": 51.58594512939453, "eval_objective": 51.662628173828125, "eval_ranking_idealized": 0.5212215185165405, "eval_ranking_idealized_expo": 0.5212215185165405, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 51.662628173828125, "eval_runtime": 307.0369, "eval_samples_per_second": 18.858, "eval_steps_per_second": 1.573, "eval_wo_beta": 14.14501953125, "step": 1200 }, { "epoch": 3.4057628719886632, "step": 1200, "total_flos": 0.0, "train_loss": 2.660881093343099, "train_runtime": 6833.7834, "train_samples_per_second": 37.17, "train_steps_per_second": 0.258 } ], "logging_steps": 50, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }