{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.992914501653283, "eval_steps": 50, "global_step": 1056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 36.794102305076855, "learning_rate": 9.433962264150943e-09, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 0.0051, "objective": 0.0046141319908201694, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.0046141319908201694, "step": 1, "wo_beta": 14.841486930847168 }, { "dpo_loss": 0.6930367350578308, "epoch": 0.014170996693434105, "grad_norm": 51.56528279298989, "learning_rate": 4.7169811320754715e-08, "logits": -1.4291929006576538, "logps": -83.85256958007812, "loss": 0.0058, "objective": 0.005918528418987989, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4895833432674408, "ranking_simple": 0.4895833432674408, "regularize": 0.005918528418987989, "step": 5, "wo_beta": 16.667278289794922 }, { "dpo_loss": 0.6930564641952515, "epoch": 0.02834199338686821, "grad_norm": 43.62540826850091, "learning_rate": 9.433962264150943e-08, "logits": -1.4014313220977783, "logps": -84.90540313720703, "loss": 0.0065, "objective": 0.00607979716733098, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.00607979716733098, "step": 10, "wo_beta": 15.295255661010742 }, { "dpo_loss": 0.691772997379303, "epoch": 0.042512990080302314, "grad_norm": 40.579476886356176, "learning_rate": 1.4150943396226414e-07, "logits": -1.5395350456237793, "logps": -84.67674255371094, "loss": 0.0077, "objective": 0.007744006346911192, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.5666666626930237, "regularize": 0.007744006346911192, "step": 15, "wo_beta": 15.72358512878418 }, { "dpo_loss": 0.6908682584762573, "epoch": 0.05668398677373642, "grad_norm": 38.45055261776428, "learning_rate": 1.8867924528301886e-07, "logits": -1.3619084358215332, "logps": -83.87267303466797, "loss": 0.0106, "objective": 0.011018705554306507, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.011018705554306507, "step": 20, "wo_beta": 16.501863479614258 }, { "dpo_loss": 0.6917246580123901, "epoch": 0.07085498346717052, "grad_norm": 37.49075261903623, "learning_rate": 2.3584905660377358e-07, "logits": -1.366659164428711, "logps": -84.04557037353516, "loss": 0.0144, "objective": 0.012653553858399391, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5625, "regularize": 0.012653553858399391, "step": 25, "wo_beta": 15.649717330932617 }, { "dpo_loss": 0.6906312704086304, "epoch": 0.08502598016060463, "grad_norm": 35.42831042318107, "learning_rate": 2.830188679245283e-07, "logits": -1.4202715158462524, "logps": -84.00289154052734, "loss": 0.0156, "objective": 0.015595527365803719, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.015595527365803719, "step": 30, "wo_beta": 16.955543518066406 }, { "dpo_loss": 0.6931964755058289, "epoch": 0.09919697685403873, "grad_norm": 37.327321600930496, "learning_rate": 3.30188679245283e-07, "logits": -1.3935037851333618, "logps": -83.39187622070312, "loss": 0.0202, "objective": 0.021191226318478584, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.021191226318478584, "step": 35, "wo_beta": 16.169347763061523 }, { "dpo_loss": 0.693729817867279, "epoch": 0.11336797354747284, "grad_norm": 41.6880498675233, "learning_rate": 3.773584905660377e-07, "logits": -1.381697177886963, "logps": -83.91118621826172, "loss": 0.0228, "objective": 0.02042653225362301, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.02042653225362301, "step": 40, "wo_beta": 14.309080123901367 }, { "dpo_loss": 0.6919765472412109, "epoch": 0.12753897024090693, "grad_norm": 41.11048762433909, "learning_rate": 4.2452830188679244e-07, "logits": -1.3955552577972412, "logps": -84.25520324707031, "loss": 0.027, "objective": 0.025382202118635178, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 0.025382202118635178, "step": 45, "wo_beta": 14.21595287322998 }, { "dpo_loss": 0.6911224722862244, "epoch": 0.14170996693434104, "grad_norm": 41.07625280062658, "learning_rate": 4.7169811320754717e-07, "logits": -1.4127604961395264, "logps": -85.3918685913086, "loss": 0.0351, "objective": 0.03202561289072037, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03202561289072037, "step": 50, "wo_beta": 15.589811325073242 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6926834583282471, "eval_logits": -1.391736626625061, "eval_logps": -91.23294067382812, "eval_loss": 0.02213538996875286, "eval_objective": 0.022384027019143105, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.022384027019143105, "eval_runtime": 470.1076, "eval_samples_per_second": 12.316, "eval_steps_per_second": 1.027, "eval_wo_beta": 16.221710205078125, "step": 50 }, { "dpo_loss": 0.6922997832298279, "epoch": 0.15588096362777515, "grad_norm": 36.466581476765526, "learning_rate": 5.188679245283019e-07, "logits": -1.3620656728744507, "logps": -84.91451263427734, "loss": 0.0367, "objective": 0.0405682697892189, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.4833333194255829, "regularize": 0.0405682697892189, "step": 55, "wo_beta": 15.095004081726074 }, { "dpo_loss": 0.6875351071357727, "epoch": 0.17005196032120926, "grad_norm": 36.25782748515131, "learning_rate": 5.660377358490566e-07, "logits": -1.28928804397583, "logps": -85.71366119384766, "loss": 0.0403, "objective": 0.04035286232829094, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 0.04035286232829094, "step": 60, "wo_beta": 14.607115745544434 }, { "dpo_loss": 0.6947705149650574, "epoch": 0.18422295701464336, "grad_norm": 41.25867915272223, "learning_rate": 6.132075471698112e-07, "logits": -1.3798266649246216, "logps": -83.1692123413086, "loss": 0.0491, "objective": 0.050007414072752, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.050007414072752, "step": 65, "wo_beta": 14.976885795593262 }, { "dpo_loss": 0.6880966424942017, "epoch": 0.19839395370807747, "grad_norm": 35.20333705483616, "learning_rate": 6.60377358490566e-07, "logits": -1.4017753601074219, "logps": -85.73289489746094, "loss": 0.0551, "objective": 0.059768859297037125, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5249999761581421, "regularize": 0.059768859297037125, "step": 70, "wo_beta": 15.204180717468262 }, { "dpo_loss": 0.6949416995048523, "epoch": 0.21256495040151158, "grad_norm": 35.61853042350494, "learning_rate": 7.075471698113207e-07, "logits": -1.321311593055725, "logps": -85.34779357910156, "loss": 0.0579, "objective": 0.06061805784702301, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5291666388511658, "regularize": 0.06061805784702301, "step": 75, "wo_beta": 14.980683326721191 }, { "dpo_loss": 0.6930631995201111, "epoch": 0.22673594709494568, "grad_norm": 34.9536345678453, "learning_rate": 7.547169811320754e-07, "logits": -1.4264112710952759, "logps": -84.01344299316406, "loss": 0.0626, "objective": 0.062408361583948135, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5333333611488342, "regularize": 0.062408361583948135, "step": 80, "wo_beta": 16.357084274291992 }, { "dpo_loss": 0.6939026117324829, "epoch": 0.2409069437883798, "grad_norm": 35.4653089608865, "learning_rate": 8.018867924528302e-07, "logits": -1.4041804075241089, "logps": -83.52224731445312, "loss": 0.0695, "objective": 0.07861108332872391, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5416666865348816, "regularize": 0.07861108332872391, "step": 85, "wo_beta": 14.987756729125977 }, { "dpo_loss": 0.6886675357818604, "epoch": 0.25507794048181387, "grad_norm": 38.910010820592774, "learning_rate": 8.490566037735849e-07, "logits": -1.5007805824279785, "logps": -84.52466583251953, "loss": 0.0806, "objective": 0.08859896659851074, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5208333134651184, "regularize": 0.08859896659851074, "step": 90, "wo_beta": 15.482732772827148 }, { "dpo_loss": 0.6961393356323242, "epoch": 0.269248937175248, "grad_norm": 38.50762322649532, "learning_rate": 8.962264150943396e-07, "logits": -1.4152452945709229, "logps": -83.7827377319336, "loss": 0.0851, "objective": 0.08412078768014908, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.47083333134651184, "regularize": 0.08412078768014908, "step": 95, "wo_beta": 16.229019165039062 }, { "dpo_loss": 0.6928918361663818, "epoch": 0.2834199338686821, "grad_norm": 34.07886171444254, "learning_rate": 9.433962264150943e-07, "logits": -1.2942625284194946, "logps": -81.22164916992188, "loss": 0.0877, "objective": 0.08352937549352646, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4833333194255829, "regularize": 0.08352937549352646, "step": 100, "wo_beta": 15.187151908874512 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6921994090080261, "eval_logits": -1.3862521648406982, "eval_logps": -88.66019439697266, "eval_loss": 0.04334083944559097, "eval_objective": 0.04473063722252846, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.04473063722252846, "eval_runtime": 472.2375, "eval_samples_per_second": 12.261, "eval_steps_per_second": 1.023, "eval_wo_beta": 16.16818618774414, "step": 100 }, { "dpo_loss": 0.6989858150482178, "epoch": 0.2975909305621162, "grad_norm": 34.124768906394316, "learning_rate": 9.90566037735849e-07, "logits": -1.4883809089660645, "logps": -83.63202667236328, "loss": 0.0937, "objective": 0.10326550155878067, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.10326550155878067, "step": 105, "wo_beta": 14.697186470031738 }, { "dpo_loss": 0.6916998624801636, "epoch": 0.3117619272555503, "grad_norm": 36.53960499520599, "learning_rate": 9.99956257238817e-07, "logits": -1.3666936159133911, "logps": -82.67723083496094, "loss": 0.1009, "objective": 0.09831760078668594, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.09831760078668594, "step": 110, "wo_beta": 14.75289249420166 }, { "dpo_loss": 0.6972029805183411, "epoch": 0.32593292394898443, "grad_norm": 32.3431868996238, "learning_rate": 9.997785653888834e-07, "logits": -1.351915955543518, "logps": -82.5732650756836, "loss": 0.1062, "objective": 0.10171337425708771, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5083333253860474, "regularize": 0.10171337425708771, "step": 115, "wo_beta": 16.003950119018555 }, { "dpo_loss": 0.6885399222373962, "epoch": 0.3401039206424185, "grad_norm": 35.92878266852989, "learning_rate": 9.994642390694308e-07, "logits": -1.367909550666809, "logps": -82.90719604492188, "loss": 0.1098, "objective": 0.11067435145378113, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.11067435145378113, "step": 120, "wo_beta": 15.639138221740723 }, { "dpo_loss": 0.6936843395233154, "epoch": 0.35427491733585265, "grad_norm": 30.26276247254467, "learning_rate": 9.990133642141357e-07, "logits": -1.3929860591888428, "logps": -85.65290069580078, "loss": 0.1056, "objective": 0.11743973940610886, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4749999940395355, "regularize": 0.11743973940610886, "step": 125, "wo_beta": 15.93514633178711 }, { "dpo_loss": 0.6941003799438477, "epoch": 0.3684459140292867, "grad_norm": 39.21461417787312, "learning_rate": 9.98426064087682e-07, "logits": -1.3525993824005127, "logps": -83.56419372558594, "loss": 0.1211, "objective": 0.11899420619010925, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.5833333134651184, "regularize": 0.11899420619010925, "step": 130, "wo_beta": 16.0157527923584 }, { "dpo_loss": 0.6882577538490295, "epoch": 0.3826169107227208, "grad_norm": 32.67768184928008, "learning_rate": 9.977024992520601e-07, "logits": -1.3901729583740234, "logps": -84.39146423339844, "loss": 0.1253, "objective": 0.12414517998695374, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.12414517998695374, "step": 135, "wo_beta": 14.371219635009766 }, { "dpo_loss": 0.6830641627311707, "epoch": 0.39678790741615494, "grad_norm": 33.07732649314307, "learning_rate": 9.968428675226713e-07, "logits": -1.3437649011611938, "logps": -85.44697570800781, "loss": 0.1248, "objective": 0.12058641016483307, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5375000238418579, "regularize": 0.12058641016483307, "step": 140, "wo_beta": 14.547070503234863 }, { "dpo_loss": 0.6851420998573303, "epoch": 0.410958904109589, "grad_norm": 30.784646211601874, "learning_rate": 9.958474039142469e-07, "logits": -1.3567951917648315, "logps": -86.4469223022461, "loss": 0.1319, "objective": 0.13056445121765137, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5625, "regularize": 0.13056445121765137, "step": 145, "wo_beta": 13.91884994506836 }, { "dpo_loss": 0.6960374116897583, "epoch": 0.42512990080302315, "grad_norm": 32.05337681597037, "learning_rate": 9.947163805765979e-07, "logits": -1.3565360307693481, "logps": -86.30919647216797, "loss": 0.1323, "objective": 0.12925057113170624, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5375000238418579, "regularize": 0.12925057113170624, "step": 150, "wo_beta": 16.796695709228516 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.695567786693573, "eval_logits": -1.3053797483444214, "eval_logps": -90.43773651123047, "eval_loss": 0.07677316665649414, "eval_objective": 0.07639209181070328, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.07639209181070328, "eval_runtime": 526.1958, "eval_samples_per_second": 11.004, "eval_steps_per_second": 0.918, "eval_wo_beta": 16.003387451171875, "step": 150 }, { "dpo_loss": 0.6933045983314514, "epoch": 0.43930089749645723, "grad_norm": 31.605620123374155, "learning_rate": 9.934501067202117e-07, "logits": -1.3933676481246948, "logps": -83.03238677978516, "loss": 0.1358, "objective": 0.1285211592912674, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4791666567325592, "regularize": 0.1285211592912674, "step": 155, "wo_beta": 15.31113338470459 }, { "dpo_loss": 0.6946766972541809, "epoch": 0.45347189418989137, "grad_norm": 32.22880904067845, "learning_rate": 9.92048928531717e-07, "logits": -1.2931861877441406, "logps": -83.0308837890625, "loss": 0.1338, "objective": 0.12377996742725372, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5416666865348816, "regularize": 0.12377996742725372, "step": 160, "wo_beta": 14.51412296295166 }, { "dpo_loss": 0.6858457326889038, "epoch": 0.46764289088332545, "grad_norm": 28.56289647538006, "learning_rate": 9.905132290792392e-07, "logits": -1.3845534324645996, "logps": -84.35334777832031, "loss": 0.1295, "objective": 0.13048619031906128, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.13048619031906128, "step": 165, "wo_beta": 15.858311653137207 }, { "dpo_loss": 0.6987485289573669, "epoch": 0.4818138875767596, "grad_norm": 31.697158183348822, "learning_rate": 9.888434282076757e-07, "logits": -1.3974741697311401, "logps": -82.40156555175781, "loss": 0.1376, "objective": 0.14300216734409332, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 0.14300216734409332, "step": 170, "wo_beta": 15.730175018310547 }, { "dpo_loss": 0.6993536353111267, "epoch": 0.49598488427019366, "grad_norm": 30.951333756278135, "learning_rate": 9.870399824239114e-07, "logits": -1.2470077276229858, "logps": -83.35051727294922, "loss": 0.1401, "objective": 0.13475559651851654, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 0.13475559651851654, "step": 175, "wo_beta": 17.82953643798828 }, { "dpo_loss": 0.6983634233474731, "epoch": 0.5101558809636277, "grad_norm": 34.822921079044, "learning_rate": 9.851033847720164e-07, "logits": -1.2282413244247437, "logps": -83.51294708251953, "loss": 0.1442, "objective": 0.143393412232399, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4625000059604645, "regularize": 0.143393412232399, "step": 180, "wo_beta": 14.920221328735352 }, { "dpo_loss": 0.6972795128822327, "epoch": 0.5243268776570619, "grad_norm": 34.3447207787113, "learning_rate": 9.83034164698452e-07, "logits": -1.2574915885925293, "logps": -82.5478515625, "loss": 0.1382, "objective": 0.14230893552303314, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4749999940395355, "regularize": 0.14230893552303314, "step": 185, "wo_beta": 14.194059371948242 }, { "dpo_loss": 0.6978750824928284, "epoch": 0.538497874350496, "grad_norm": 34.00712851830173, "learning_rate": 9.808328879073251e-07, "logits": -1.2612725496292114, "logps": -81.91997528076172, "loss": 0.1466, "objective": 0.14948724210262299, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5666666626930237, "regularize": 0.14948724210262299, "step": 190, "wo_beta": 16.620363235473633 }, { "dpo_loss": 0.6822370290756226, "epoch": 0.5526688710439301, "grad_norm": 31.586658287520144, "learning_rate": 9.78500156205731e-07, "logits": -1.2822577953338623, "logps": -83.0813217163086, "loss": 0.1319, "objective": 0.13207347691059113, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5333333611488342, "regularize": 0.13207347691059113, "step": 195, "wo_beta": 14.693647384643555 }, { "dpo_loss": 0.7044106721878052, "epoch": 0.5668398677373642, "grad_norm": 30.369620708498754, "learning_rate": 9.760366073392244e-07, "logits": -1.3258157968521118, "logps": -83.32820129394531, "loss": 0.1427, "objective": 0.15046708285808563, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.15046708285808563, "step": 200, "wo_beta": 15.960111618041992 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6959174871444702, "eval_logits": -1.3123745918273926, "eval_logps": -88.34333801269531, "eval_loss": 0.10319730639457703, "eval_objective": 0.10169863700866699, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.10169863700866699, "eval_runtime": 532.3008, "eval_samples_per_second": 10.877, "eval_steps_per_second": 0.907, "eval_wo_beta": 15.992826461791992, "step": 200 }, { "dpo_loss": 0.7000283598899841, "epoch": 0.5810108644307983, "grad_norm": 30.076737378719095, "learning_rate": 9.734429148174674e-07, "logits": -1.2141386270523071, "logps": -82.74073028564453, "loss": 0.1484, "objective": 0.1470470279455185, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.49166667461395264, "regularize": 0.1470470279455185, "step": 205, "wo_beta": 16.118446350097656 }, { "dpo_loss": 0.6862087249755859, "epoch": 0.5951818611242324, "grad_norm": 31.36222267459615, "learning_rate": 9.707197877300973e-07, "logits": -1.2483521699905396, "logps": -82.3885269165039, "loss": 0.1454, "objective": 0.14993111789226532, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5291666388511658, "regularize": 0.14993111789226532, "step": 210, "wo_beta": 15.07961654663086 }, { "dpo_loss": 0.6946883797645569, "epoch": 0.6093528578176665, "grad_norm": 32.25125352651472, "learning_rate": 9.678679705528698e-07, "logits": -1.3168671131134033, "logps": -82.3456039428711, "loss": 0.1384, "objective": 0.14188070595264435, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4791666567325592, "regularize": 0.14188070595264435, "step": 215, "wo_beta": 16.104469299316406 }, { "dpo_loss": 0.7026723027229309, "epoch": 0.6235238545111006, "grad_norm": 30.142053540661294, "learning_rate": 9.648882429441256e-07, "logits": -1.3188337087631226, "logps": -82.63532257080078, "loss": 0.1477, "objective": 0.1607874184846878, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.49166667461395264, "regularize": 0.1607874184846878, "step": 220, "wo_beta": 17.079347610473633 }, { "dpo_loss": 0.6998167634010315, "epoch": 0.6376948512045347, "grad_norm": 29.418648888160003, "learning_rate": 9.61781419531641e-07, "logits": -1.3314566612243652, "logps": -82.72489929199219, "loss": 0.1465, "objective": 0.14282181859016418, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.4958333373069763, "regularize": 0.14282181859016418, "step": 225, "wo_beta": 15.506386756896973 }, { "dpo_loss": 0.7007436156272888, "epoch": 0.6518658478979689, "grad_norm": 31.584769522955447, "learning_rate": 9.585483496899149e-07, "logits": -1.2612279653549194, "logps": -82.21707916259766, "loss": 0.1434, "objective": 0.14342841506004333, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5583333373069763, "regularize": 0.14342841506004333, "step": 230, "wo_beta": 16.431724548339844 }, { "dpo_loss": 0.7085835337638855, "epoch": 0.6660368445914029, "grad_norm": 33.138665174716316, "learning_rate": 9.551899173079606e-07, "logits": -1.2083913087844849, "logps": -84.15171813964844, "loss": 0.1479, "objective": 0.14772751927375793, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5291666388511658, "regularize": 0.14772751927375793, "step": 235, "wo_beta": 15.722906112670898 }, { "dpo_loss": 0.6893501877784729, "epoch": 0.680207841284837, "grad_norm": 28.511782322472136, "learning_rate": 9.517070405476574e-07, "logits": -1.3556396961212158, "logps": -83.491943359375, "loss": 0.1408, "objective": 0.1575685441493988, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5249999761581421, "regularize": 0.1575685441493988, "step": 240, "wo_beta": 15.692626953125 }, { "dpo_loss": 0.6901037693023682, "epoch": 0.6943788379782712, "grad_norm": 28.887977273452503, "learning_rate": 9.481006715927351e-07, "logits": -1.3499360084533691, "logps": -82.59223937988281, "loss": 0.1422, "objective": 0.1397981345653534, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5416666865348816, "regularize": 0.1397981345653534, "step": 245, "wo_beta": 15.627848625183105 }, { "dpo_loss": 0.6898453831672668, "epoch": 0.7085498346717053, "grad_norm": 30.778123472149638, "learning_rate": 9.443717963884568e-07, "logits": -1.1249743700027466, "logps": -81.38602447509766, "loss": 0.1451, "objective": 0.12806275486946106, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.12806275486946106, "step": 250, "wo_beta": 14.860217094421387 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.6950441002845764, "eval_logits": -1.2854480743408203, "eval_logps": -88.06980895996094, "eval_loss": 0.11781599372625351, "eval_objective": 0.11854107677936554, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.11854107677936554, "eval_runtime": 533.5732, "eval_samples_per_second": 10.851, "eval_steps_per_second": 0.905, "eval_wo_beta": 15.787796020507812, "step": 250 }, { "dpo_loss": 0.6893075704574585, "epoch": 0.7227208313651393, "grad_norm": 27.48861543576658, "learning_rate": 9.405214343720706e-07, "logits": -1.3376212120056152, "logps": -81.39327239990234, "loss": 0.1325, "objective": 0.12804514169692993, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.42916667461395264, "ranking_simple": 0.44583332538604736, "regularize": 0.12804514169692993, "step": 255, "wo_beta": 14.828557968139648 }, { "dpo_loss": 0.6919839978218079, "epoch": 0.7368918280585735, "grad_norm": 27.470977695013012, "learning_rate": 9.365506381941065e-07, "logits": -1.3046835660934448, "logps": -83.32947540283203, "loss": 0.1509, "objective": 0.15500593185424805, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4583333432674408, "regularize": 0.15500593185424805, "step": 260, "wo_beta": 15.419398307800293 }, { "dpo_loss": 0.6987964510917664, "epoch": 0.7510628247520076, "grad_norm": 29.786537519342414, "learning_rate": 9.32460493430591e-07, "logits": -1.2736799716949463, "logps": -82.46897888183594, "loss": 0.1444, "objective": 0.14515246450901031, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5708333253860474, "regularize": 0.14515246450901031, "step": 265, "wo_beta": 15.908428192138672 }, { "dpo_loss": 0.6944437026977539, "epoch": 0.7652338214454416, "grad_norm": 28.172549175339846, "learning_rate": 9.282521182862629e-07, "logits": -1.397876262664795, "logps": -82.14982604980469, "loss": 0.1491, "objective": 0.15289539098739624, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.4541666805744171, "regularize": 0.15289539098739624, "step": 270, "wo_beta": 14.118414878845215 }, { "dpo_loss": 0.6878421902656555, "epoch": 0.7794048181388757, "grad_norm": 30.974249065309053, "learning_rate": 9.239266632888658e-07, "logits": -1.265884280204773, "logps": -80.5745849609375, "loss": 0.1429, "objective": 0.13965575397014618, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 0.13965575397014618, "step": 275, "wo_beta": 15.147540092468262 }, { "dpo_loss": 0.693124532699585, "epoch": 0.7935758148323099, "grad_norm": 27.26309671203667, "learning_rate": 9.194853109746072e-07, "logits": -1.317248821258545, "logps": -80.71721649169922, "loss": 0.1422, "objective": 0.13741357624530792, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.13741357624530792, "step": 280, "wo_beta": 15.141572952270508 }, { "dpo_loss": 0.6898981332778931, "epoch": 0.807746811525744, "grad_norm": 29.618387771117387, "learning_rate": 9.14929275564863e-07, "logits": -1.2990264892578125, "logps": -81.34524536132812, "loss": 0.1481, "objective": 0.14202959835529327, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4749999940395355, "regularize": 0.14202959835529327, "step": 285, "wo_beta": 16.715734481811523 }, { "dpo_loss": 0.6989319920539856, "epoch": 0.821917808219178, "grad_norm": 30.35546225687188, "learning_rate": 9.102598026342222e-07, "logits": -1.310984492301941, "logps": -80.47208404541016, "loss": 0.1416, "objective": 0.13658234477043152, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 0.13658234477043152, "step": 290, "wo_beta": 15.537436485290527 }, { "dpo_loss": 0.692668080329895, "epoch": 0.8360888049126122, "grad_norm": 28.386489735858774, "learning_rate": 9.0547816876996e-07, "logits": -1.3056447505950928, "logps": -80.58573913574219, "loss": 0.1335, "objective": 0.14200052618980408, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.48750001192092896, "regularize": 0.14200052618980408, "step": 295, "wo_beta": 15.984179496765137 }, { "dpo_loss": 0.6959550380706787, "epoch": 0.8502598016060463, "grad_norm": 30.90903589796416, "learning_rate": 9.005856812230304e-07, "logits": -1.2770187854766846, "logps": -79.3738784790039, "loss": 0.1305, "objective": 0.12751255929470062, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5541666746139526, "regularize": 0.12751255929470062, "step": 300, "wo_beta": 14.3499755859375 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6960889101028442, "eval_logits": -1.2862635850906372, "eval_logps": -86.33123016357422, "eval_loss": 0.12468627840280533, "eval_objective": 0.1251634955406189, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.1251634955406189, "eval_runtime": 492.3852, "eval_samples_per_second": 11.759, "eval_steps_per_second": 0.981, "eval_wo_beta": 15.766751289367676, "step": 300 }, { "dpo_loss": 0.6915071606636047, "epoch": 0.8644307982994804, "grad_norm": 28.35320542673635, "learning_rate": 8.955836775506775e-07, "logits": -1.2531558275222778, "logps": -80.3687744140625, "loss": 0.1326, "objective": 0.1348031610250473, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5249999761581421, "regularize": 0.1348031610250473, "step": 305, "wo_beta": 15.622274398803711 }, { "dpo_loss": 0.6971884965896606, "epoch": 0.8786017949929145, "grad_norm": 28.116582054859066, "learning_rate": 8.904735252507609e-07, "logits": -1.256584882736206, "logps": -79.94914245605469, "loss": 0.1365, "objective": 0.1369226723909378, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.4958333373069763, "regularize": 0.1369226723909378, "step": 310, "wo_beta": 14.816594123840332 }, { "dpo_loss": 0.6855903267860413, "epoch": 0.8927727916863486, "grad_norm": 29.897768012112312, "learning_rate": 8.852566213878946e-07, "logits": -1.2702066898345947, "logps": -79.8655014038086, "loss": 0.1353, "objective": 0.13145793974399567, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.4749999940395355, "regularize": 0.13145793974399567, "step": 315, "wo_beta": 15.161810874938965 }, { "dpo_loss": 0.691845178604126, "epoch": 0.9069437883797827, "grad_norm": 28.736143424115674, "learning_rate": 8.799343922115043e-07, "logits": -1.2241441011428833, "logps": -82.17134094238281, "loss": 0.13, "objective": 0.1402612328529358, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5541666746139526, "regularize": 0.1402612328529358, "step": 320, "wo_beta": 15.099017143249512 }, { "dpo_loss": 0.6962689161300659, "epoch": 0.9211147850732169, "grad_norm": 28.356303375759392, "learning_rate": 8.745082927659046e-07, "logits": -1.2910945415496826, "logps": -83.30491638183594, "loss": 0.1308, "objective": 0.14350637793540955, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.14350637793540955, "step": 325, "wo_beta": 15.133590698242188 }, { "dpo_loss": 0.6975868344306946, "epoch": 0.9352857817666509, "grad_norm": 29.00689810312343, "learning_rate": 8.689798064925048e-07, "logits": -1.1349345445632935, "logps": -82.04910278320312, "loss": 0.1321, "objective": 0.1296585500240326, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.1296585500240326, "step": 330, "wo_beta": 16.1423282623291 }, { "dpo_loss": 0.7005541920661926, "epoch": 0.949456778460085, "grad_norm": 32.3756572284601, "learning_rate": 8.633504448242504e-07, "logits": -1.149806261062622, "logps": -81.64175415039062, "loss": 0.1375, "objective": 0.1390267014503479, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.1390267014503479, "step": 335, "wo_beta": 15.652006149291992 }, { "dpo_loss": 0.6950960755348206, "epoch": 0.9636277751535192, "grad_norm": 27.122604040368284, "learning_rate": 8.576217467724127e-07, "logits": -1.2132624387741089, "logps": -80.64006042480469, "loss": 0.1292, "objective": 0.12200692296028137, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5416666865348816, "regularize": 0.12200692296028137, "step": 340, "wo_beta": 15.907822608947754 }, { "dpo_loss": 0.6975562572479248, "epoch": 0.9777987718469532, "grad_norm": 27.134170349804087, "learning_rate": 8.517952785058384e-07, "logits": -1.2632955312728882, "logps": -80.71128845214844, "loss": 0.1283, "objective": 0.11938898265361786, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5708333253860474, "regularize": 0.11938898265361786, "step": 345, "wo_beta": 14.762292861938477 }, { "dpo_loss": 0.6852299571037292, "epoch": 0.9919697685403873, "grad_norm": 27.658996359022336, "learning_rate": 8.458726329227747e-07, "logits": -1.1914026737213135, "logps": -81.73149108886719, "loss": 0.1407, "objective": 0.1554519683122635, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.1554519683122635, "step": 350, "wo_beta": 15.107101440429688 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6975587606430054, "eval_logits": -1.2756990194320679, "eval_logps": -86.45014190673828, "eval_loss": 0.13138790428638458, "eval_objective": 0.13096390664577484, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.13096390664577484, "eval_runtime": 498.5276, "eval_samples_per_second": 11.614, "eval_steps_per_second": 0.969, "eval_wo_beta": 15.656978607177734, "step": 350 }, { "dpo_loss": 0.6982021331787109, "epoch": 1.0061407652338215, "grad_norm": 28.652193663332632, "learning_rate": 8.398554292153865e-07, "logits": -1.3350815773010254, "logps": -79.34367370605469, "loss": 0.1274, "objective": 0.1257932186126709, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5333333611488342, "regularize": 0.1257932186126709, "step": 355, "wo_beta": 16.378000259399414 }, { "dpo_loss": 0.6944258809089661, "epoch": 1.0203117619272555, "grad_norm": 27.709591206743504, "learning_rate": 8.337453124270862e-07, "logits": -1.2474267482757568, "logps": -80.31254577636719, "loss": 0.1453, "objective": 0.14443162083625793, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.4958333373069763, "regularize": 0.14443162083625793, "step": 360, "wo_beta": 16.190935134887695 }, { "dpo_loss": 0.6928178071975708, "epoch": 1.0344827586206897, "grad_norm": 31.070681767199403, "learning_rate": 8.275439530027947e-07, "logits": -1.276475191116333, "logps": -80.50602722167969, "loss": 0.1371, "objective": 0.13979977369308472, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5166666507720947, "regularize": 0.13979977369308472, "step": 365, "wo_beta": 14.378859519958496 }, { "dpo_loss": 0.699609637260437, "epoch": 1.0486537553141237, "grad_norm": 30.003574042191506, "learning_rate": 8.212530463322582e-07, "logits": -1.2496185302734375, "logps": -79.11912536621094, "loss": 0.1306, "objective": 0.1423943042755127, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.512499988079071, "regularize": 0.1423943042755127, "step": 370, "wo_beta": 15.124627113342285 }, { "dpo_loss": 0.6967942714691162, "epoch": 1.0628247520075578, "grad_norm": 27.953352635424668, "learning_rate": 8.148743122865463e-07, "logits": -1.3011940717697144, "logps": -80.02760314941406, "loss": 0.1297, "objective": 0.11541719734668732, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5249999761581421, "regularize": 0.11541719734668732, "step": 375, "wo_beta": 15.568713188171387 }, { "dpo_loss": 0.6877638697624207, "epoch": 1.076995748700992, "grad_norm": 27.32675287386393, "learning_rate": 8.084094947478554e-07, "logits": -1.2718795537948608, "logps": -81.57784271240234, "loss": 0.1306, "objective": 0.1403437703847885, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.1403437703847885, "step": 380, "wo_beta": 15.024064064025879 }, { "dpo_loss": 0.7029018402099609, "epoch": 1.091166745394426, "grad_norm": 25.199092121516863, "learning_rate": 8.018603611327504e-07, "logits": -1.2051031589508057, "logps": -80.49242401123047, "loss": 0.1289, "objective": 0.12692388892173767, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5375000238418579, "regularize": 0.12692388892173767, "step": 385, "wo_beta": 15.658522605895996 }, { "dpo_loss": 0.6920034885406494, "epoch": 1.10533774208786, "grad_norm": 28.52425339340298, "learning_rate": 7.952287019089685e-07, "logits": -1.1542584896087646, "logps": -82.2014389038086, "loss": 0.128, "objective": 0.13050222396850586, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5666666626930237, "regularize": 0.13050222396850586, "step": 390, "wo_beta": 15.990551948547363 }, { "dpo_loss": 0.6983939409255981, "epoch": 1.1195087387812943, "grad_norm": 28.676328293583875, "learning_rate": 7.88516330105925e-07, "logits": -1.21431303024292, "logps": -81.3152847290039, "loss": 0.1271, "objective": 0.12024066597223282, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 0.12024066597223282, "step": 395, "wo_beta": 14.856566429138184 }, { "dpo_loss": 0.7045825719833374, "epoch": 1.1336797354747283, "grad_norm": 26.361954924055155, "learning_rate": 7.817250808190483e-07, "logits": -1.2783249616622925, "logps": -79.67323303222656, "loss": 0.1245, "objective": 0.12074790149927139, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5416666865348816, "regularize": 0.12074790149927139, "step": 400, "wo_beta": 15.344539642333984 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.698018491268158, "eval_logits": -1.2417831420898438, "eval_logps": -86.2849349975586, "eval_loss": 0.13988268375396729, "eval_objective": 0.13904725015163422, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.13904725015163422, "eval_runtime": 544.4211, "eval_samples_per_second": 10.635, "eval_steps_per_second": 0.887, "eval_wo_beta": 15.614696502685547, "step": 400 }, { "dpo_loss": 0.6944829225540161, "epoch": 1.1478507321681626, "grad_norm": 26.829396266860115, "learning_rate": 7.74856810708083e-07, "logits": -1.2358256578445435, "logps": -80.91136169433594, "loss": 0.1252, "objective": 0.13733495771884918, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46666666865348816, "regularize": 0.13733495771884918, "step": 405, "wo_beta": 16.799846649169922 }, { "dpo_loss": 0.6951694488525391, "epoch": 1.1620217288615966, "grad_norm": 25.84880624163644, "learning_rate": 7.679133974894982e-07, "logits": -1.2413955926895142, "logps": -80.84453582763672, "loss": 0.1146, "objective": 0.10967493802309036, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.512499988079071, "regularize": 0.10967493802309036, "step": 410, "wo_beta": 16.299657821655273 }, { "dpo_loss": 0.6816955804824829, "epoch": 1.1761927255550306, "grad_norm": 28.539266676030703, "learning_rate": 7.608967394231386e-07, "logits": -1.1460075378417969, "logps": -80.07962799072266, "loss": 0.1201, "objective": 0.11568634957075119, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.512499988079071, "regularize": 0.11568634957075119, "step": 415, "wo_beta": 15.849366188049316 }, { "dpo_loss": 0.6958954334259033, "epoch": 1.1903637222484649, "grad_norm": 26.83226072322417, "learning_rate": 7.538087547932584e-07, "logits": -1.1252403259277344, "logps": -80.94552612304688, "loss": 0.1212, "objective": 0.11827482283115387, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5083333253860474, "regularize": 0.11827482283115387, "step": 420, "wo_beta": 16.14940643310547 }, { "dpo_loss": 0.6887015700340271, "epoch": 1.204534718941899, "grad_norm": 26.51780573149761, "learning_rate": 7.466513813840824e-07, "logits": -1.1933962106704712, "logps": -78.89797973632812, "loss": 0.1135, "objective": 0.1143736019730568, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5166666507720947, "regularize": 0.1143736019730568, "step": 425, "wo_beta": 14.578470230102539 }, { "dpo_loss": 0.6991615891456604, "epoch": 1.2187057156353331, "grad_norm": 27.122286588814305, "learning_rate": 7.394265759500347e-07, "logits": -1.1930339336395264, "logps": -80.126220703125, "loss": 0.1127, "objective": 0.11676573753356934, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.4749999940395355, "regularize": 0.11676573753356934, "step": 430, "wo_beta": 15.9819974899292 }, { "dpo_loss": 0.6940677762031555, "epoch": 1.2328767123287672, "grad_norm": 26.542064973728884, "learning_rate": 7.321363136807818e-07, "logits": -1.1478148698806763, "logps": -80.018310546875, "loss": 0.1273, "objective": 0.12024448066949844, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.6000000238418579, "regularize": 0.12024448066949844, "step": 435, "wo_beta": 17.044919967651367 }, { "dpo_loss": 0.6969379186630249, "epoch": 1.2470477090222012, "grad_norm": 28.251093862423456, "learning_rate": 7.247825876612352e-07, "logits": -1.1687721014022827, "logps": -79.19255828857422, "loss": 0.1253, "objective": 0.12027813494205475, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.550000011920929, "regularize": 0.12027813494205475, "step": 440, "wo_beta": 14.654241561889648 }, { "dpo_loss": 0.6925280094146729, "epoch": 1.2612187057156352, "grad_norm": 26.524957115429544, "learning_rate": 7.173674083266623e-07, "logits": -1.1623238325119019, "logps": -80.57234191894531, "loss": 0.1123, "objective": 0.11110406368970871, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5416666865348816, "regularize": 0.11110406368970871, "step": 445, "wo_beta": 16.533472061157227 }, { "dpo_loss": 0.6959200501441956, "epoch": 1.2753897024090695, "grad_norm": 25.817109114436615, "learning_rate": 7.098928029130528e-07, "logits": -1.2953335046768188, "logps": -80.17058563232422, "loss": 0.1163, "objective": 0.11630918085575104, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.11630918085575104, "step": 450, "wo_beta": 14.598237991333008 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.6984797716140747, "eval_logits": -1.2306897640228271, "eval_logps": -85.48281860351562, "eval_loss": 0.14205217361450195, "eval_objective": 0.14207439124584198, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.14207439124584198, "eval_runtime": 502.3414, "eval_samples_per_second": 11.526, "eval_steps_per_second": 0.961, "eval_wo_beta": 15.61281681060791, "step": 450 }, { "dpo_loss": 0.6977149248123169, "epoch": 1.2895606991025035, "grad_norm": 29.01794849451687, "learning_rate": 7.023608149028936e-07, "logits": -1.1321525573730469, "logps": -79.79704284667969, "loss": 0.1102, "objective": 0.10798730701208115, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.4958333373069763, "regularize": 0.10798730701208115, "step": 455, "wo_beta": 14.988642692565918 }, { "dpo_loss": 0.6960881352424622, "epoch": 1.3037316957959377, "grad_norm": 25.82316278857825, "learning_rate": 6.947735034665001e-07, "logits": -1.2272473573684692, "logps": -79.4093246459961, "loss": 0.1071, "objective": 0.10132616013288498, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48750001192092896, "regularize": 0.10132616013288498, "step": 460, "wo_beta": 15.888258934020996 }, { "dpo_loss": 0.6906372308731079, "epoch": 1.3179026924893718, "grad_norm": 30.635018246102483, "learning_rate": 6.871329428990601e-07, "logits": -1.2102056741714478, "logps": -78.2228775024414, "loss": 0.1131, "objective": 0.11604170501232147, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46666666865348816, "regularize": 0.11604170501232147, "step": 465, "wo_beta": 14.311129570007324 }, { "dpo_loss": 0.700882077217102, "epoch": 1.3320736891828058, "grad_norm": 27.46778566417897, "learning_rate": 6.794412220535425e-07, "logits": -1.2833130359649658, "logps": -77.55262756347656, "loss": 0.108, "objective": 0.10955775529146194, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5083333253860474, "regularize": 0.10955775529146194, "step": 470, "wo_beta": 14.30273151397705 }, { "dpo_loss": 0.6947088241577148, "epoch": 1.34624468587624, "grad_norm": 27.567991845029866, "learning_rate": 6.717004437696249e-07, "logits": -1.1878196001052856, "logps": -79.7737808227539, "loss": 0.1143, "objective": 0.10682200640439987, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5708333253860474, "regularize": 0.10682200640439987, "step": 475, "wo_beta": 16.000301361083984 }, { "dpo_loss": 0.7012575268745422, "epoch": 1.360415682569674, "grad_norm": 26.96292751307233, "learning_rate": 6.639127242987987e-07, "logits": -1.2194726467132568, "logps": -79.7364730834961, "loss": 0.1121, "objective": 0.10879840701818466, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4958333373069763, "regularize": 0.10879840701818466, "step": 480, "wo_beta": 17.723169326782227 }, { "dpo_loss": 0.6902076005935669, "epoch": 1.3745866792631083, "grad_norm": 25.536217139623062, "learning_rate": 6.560801927258079e-07, "logits": -1.2140812873840332, "logps": -77.77493286132812, "loss": 0.1063, "objective": 0.10283537954092026, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.4958333373069763, "regularize": 0.10283537954092026, "step": 485, "wo_beta": 16.162378311157227 }, { "dpo_loss": 0.7003743648529053, "epoch": 1.3887576759565423, "grad_norm": 25.340128312194377, "learning_rate": 6.482049903865768e-07, "logits": -1.1755324602127075, "logps": -80.6698226928711, "loss": 0.1065, "objective": 0.11661101877689362, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5375000238418579, "regularize": 0.11661101877689362, "step": 490, "wo_beta": 15.291964530944824 }, { "dpo_loss": 0.6933376789093018, "epoch": 1.4029286726499763, "grad_norm": 27.42214588210337, "learning_rate": 6.402892702827916e-07, "logits": -1.203405499458313, "logps": -81.71482849121094, "loss": 0.1083, "objective": 0.11117922514677048, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4833333194255829, "regularize": 0.11117922514677048, "step": 495, "wo_beta": 15.19780445098877 }, { "dpo_loss": 0.6919417977333069, "epoch": 1.4170996693434104, "grad_norm": 25.59099749967404, "learning_rate": 6.323351964932908e-07, "logits": -1.1464035511016846, "logps": -80.67649841308594, "loss": 0.1071, "objective": 0.10751333087682724, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.49166667461395264, "regularize": 0.10751333087682724, "step": 500, "wo_beta": 14.786382675170898 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6979657411575317, "eval_logits": -1.2270138263702393, "eval_logps": -87.26725006103516, "eval_loss": 0.13817694783210754, "eval_objective": 0.1376110315322876, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5284678936004639, "eval_regularize": 0.1376110315322876, "eval_runtime": 507.9058, "eval_samples_per_second": 11.4, "eval_steps_per_second": 0.951, "eval_wo_beta": 15.64445686340332, "step": 500 }, { "dpo_loss": 0.6891559362411499, "epoch": 1.4312706660368446, "grad_norm": 24.27122577359571, "learning_rate": 6.243449435824276e-07, "logits": -1.2177590131759644, "logps": -81.35147094726562, "loss": 0.1101, "objective": 0.1094871535897255, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5333333611488342, "regularize": 0.1094871535897255, "step": 505, "wo_beta": 15.79046630859375 }, { "dpo_loss": 0.6941244602203369, "epoch": 1.4454416627302786, "grad_norm": 25.930769694740054, "learning_rate": 6.163206960055652e-07, "logits": -1.251134991645813, "logps": -83.10639953613281, "loss": 0.1006, "objective": 0.09994279593229294, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5291666388511658, "regularize": 0.09994279593229294, "step": 510, "wo_beta": 14.899516105651855 }, { "dpo_loss": 0.6874905824661255, "epoch": 1.4596126594237129, "grad_norm": 26.602314880639124, "learning_rate": 6.082646475118699e-07, "logits": -1.2633229494094849, "logps": -84.02688598632812, "loss": 0.106, "objective": 0.10199037194252014, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4541666805744171, "regularize": 0.10199037194252014, "step": 515, "wo_beta": 15.941681861877441 }, { "dpo_loss": 0.6967552900314331, "epoch": 1.473783656117147, "grad_norm": 29.852612268822412, "learning_rate": 6.001790005445606e-07, "logits": -1.184912919998169, "logps": -80.95891571044922, "loss": 0.1071, "objective": 0.10300089418888092, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.49166667461395264, "regularize": 0.10300089418888092, "step": 520, "wo_beta": 15.731270790100098 }, { "dpo_loss": 0.6896428465843201, "epoch": 1.487954652810581, "grad_norm": 25.111572790175902, "learning_rate": 5.920659656387836e-07, "logits": -1.0910202264785767, "logps": -79.46784973144531, "loss": 0.1087, "objective": 0.10289794951677322, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.10289794951677322, "step": 525, "wo_beta": 14.915215492248535 }, { "dpo_loss": 0.6884135603904724, "epoch": 1.5021256495040152, "grad_norm": 29.155836377588727, "learning_rate": 5.839277608172738e-07, "logits": -1.2429722547531128, "logps": -82.09452056884766, "loss": 0.1052, "objective": 0.11081438511610031, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5083333253860474, "regularize": 0.11081438511610031, "step": 530, "wo_beta": 14.850537300109863 }, { "dpo_loss": 0.6961663961410522, "epoch": 1.5162966461974492, "grad_norm": 25.320925581209725, "learning_rate": 5.757666109839702e-07, "logits": -1.2323859930038452, "logps": -80.30747985839844, "loss": 0.1001, "objective": 0.09293892234563828, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 0.09293892234563828, "step": 535, "wo_beta": 15.262944221496582 }, { "dpo_loss": 0.6923481225967407, "epoch": 1.5304676428908834, "grad_norm": 32.01848958383342, "learning_rate": 5.675847473157485e-07, "logits": -1.1209362745285034, "logps": -80.81604766845703, "loss": 0.1017, "objective": 0.1114068478345871, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.1114068478345871, "step": 540, "wo_beta": 14.910977363586426 }, { "dpo_loss": 0.6916370987892151, "epoch": 1.5446386395843175, "grad_norm": 25.57319909143034, "learning_rate": 5.5938440665244e-07, "logits": -1.2216829061508179, "logps": -81.30005645751953, "loss": 0.1016, "objective": 0.09744974970817566, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5791666507720947, "regularize": 0.09744974970817566, "step": 545, "wo_beta": 14.310770988464355 }, { "dpo_loss": 0.6908753514289856, "epoch": 1.5588096362777515, "grad_norm": 25.722462769354692, "learning_rate": 5.511678308853025e-07, "logits": -1.2278273105621338, "logps": -81.18257141113281, "loss": 0.1045, "objective": 0.11294317990541458, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.44583332538604736, "regularize": 0.11294317990541458, "step": 550, "wo_beta": 14.909473419189453 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.6977279186248779, "eval_logits": -1.2327359914779663, "eval_logps": -87.07755279541016, "eval_loss": 0.1427639275789261, "eval_objective": 0.14261718094348907, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 0.14261718094348907, "eval_runtime": 530.6929, "eval_samples_per_second": 10.91, "eval_steps_per_second": 0.91, "eval_wo_beta": 15.58066177368164, "step": 550 }, { "dpo_loss": 0.6979414820671082, "epoch": 1.5729806329711855, "grad_norm": 25.97117042381748, "learning_rate": 5.429372663441085e-07, "logits": -1.0773119926452637, "logps": -80.85298919677734, "loss": 0.0969, "objective": 0.10372842103242874, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5083333253860474, "regularize": 0.10372842103242874, "step": 555, "wo_beta": 14.278889656066895 }, { "dpo_loss": 0.6877902746200562, "epoch": 1.5871516296646198, "grad_norm": 26.089498554586406, "learning_rate": 5.34694963183022e-07, "logits": -1.149969220161438, "logps": -80.23606872558594, "loss": 0.0937, "objective": 0.0943736732006073, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.0943736732006073, "step": 560, "wo_beta": 15.772320747375488 }, { "dpo_loss": 0.6947767734527588, "epoch": 1.601322626358054, "grad_norm": 26.116747650931945, "learning_rate": 5.264431747654283e-07, "logits": -1.1340062618255615, "logps": -81.63863372802734, "loss": 0.0947, "objective": 0.10096503049135208, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4416666626930237, "regularize": 0.10096503049135208, "step": 565, "wo_beta": 14.981669425964355 }, { "dpo_loss": 0.692035436630249, "epoch": 1.615493623051488, "grad_norm": 30.63214010200871, "learning_rate": 5.181841570478872e-07, "logits": -1.2694156169891357, "logps": -81.64689636230469, "loss": 0.0959, "objective": 0.1027316302061081, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5375000238418579, "regularize": 0.1027316302061081, "step": 570, "wo_beta": 15.00640869140625 }, { "dpo_loss": 0.6928724646568298, "epoch": 1.629664619744922, "grad_norm": 26.684109688489027, "learning_rate": 5.099201679633768e-07, "logits": -1.219287633895874, "logps": -79.6671371459961, "loss": 0.0902, "objective": 0.08943381905555725, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5458333492279053, "regularize": 0.08943381905555725, "step": 575, "wo_beta": 15.883743286132812 }, { "dpo_loss": 0.7014293670654297, "epoch": 1.643835616438356, "grad_norm": 23.659415037737205, "learning_rate": 5.016534668039976e-07, "logits": -1.245025396347046, "logps": -79.65864562988281, "loss": 0.0922, "objective": 0.09364978969097137, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5375000238418579, "regularize": 0.09364978969097137, "step": 580, "wo_beta": 14.111478805541992 }, { "dpo_loss": 0.6919021606445312, "epoch": 1.6580066131317903, "grad_norm": 26.81667336982406, "learning_rate": 4.933863136033039e-07, "logits": -1.1303011178970337, "logps": -79.01573944091797, "loss": 0.0912, "objective": 0.09164983779191971, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4749999940395355, "regularize": 0.09164983779191971, "step": 585, "wo_beta": 15.943554878234863 }, { "dpo_loss": 0.6958838701248169, "epoch": 1.6721776098252243, "grad_norm": 24.98087975104312, "learning_rate": 4.851209685184338e-07, "logits": -1.1811211109161377, "logps": -78.23771667480469, "loss": 0.0896, "objective": 0.08815690129995346, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.49166667461395264, "regularize": 0.08815690129995346, "step": 590, "wo_beta": 13.053691864013672 }, { "dpo_loss": 0.7018415927886963, "epoch": 1.6863486065186586, "grad_norm": 28.222712089048155, "learning_rate": 4.768596912122045e-07, "logits": -1.1410056352615356, "logps": -78.93828582763672, "loss": 0.0867, "objective": 0.08855770528316498, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5208333134651184, "regularize": 0.08855770528316498, "step": 595, "wo_beta": 16.56429672241211 }, { "dpo_loss": 0.6934791803359985, "epoch": 1.7005196032120926, "grad_norm": 26.368906194308657, "learning_rate": 4.686047402353433e-07, "logits": -1.1907525062561035, "logps": -80.13634490966797, "loss": 0.0866, "objective": 0.09509587287902832, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5375000238418579, "regularize": 0.09509587287902832, "step": 600, "wo_beta": 15.167766571044922 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6965176463127136, "eval_logits": -1.2196165323257446, "eval_logps": -85.19258880615234, "eval_loss": 0.14236733317375183, "eval_objective": 0.14079627394676208, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.14079627394676208, "eval_runtime": 531.3996, "eval_samples_per_second": 10.896, "eval_steps_per_second": 0.909, "eval_wo_beta": 15.660321235656738, "step": 600 }, { "dpo_loss": 0.6949159502983093, "epoch": 1.7146905999055266, "grad_norm": 27.65546942935795, "learning_rate": 4.60358372409022e-07, "logits": -1.135356068611145, "logps": -80.58204650878906, "loss": 0.0851, "objective": 0.10239014774560928, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5166666507720947, "regularize": 0.10239014774560928, "step": 605, "wo_beta": 17.19474220275879 }, { "dpo_loss": 0.6975926160812378, "epoch": 1.7288615965989607, "grad_norm": 27.123580050770954, "learning_rate": 4.521228422078649e-07, "logits": -1.2206453084945679, "logps": -78.68167877197266, "loss": 0.0882, "objective": 0.0891619473695755, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.5791666507720947, "regularize": 0.0891619473695755, "step": 610, "wo_beta": 15.383539199829102 }, { "dpo_loss": 0.6973095536231995, "epoch": 1.743032593292395, "grad_norm": 27.24577954816879, "learning_rate": 4.439004011435979e-07, "logits": -1.2362395524978638, "logps": -79.0839614868164, "loss": 0.0875, "objective": 0.08598390221595764, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5166666507720947, "regularize": 0.08598390221595764, "step": 615, "wo_beta": 15.571494102478027 }, { "dpo_loss": 0.6836999654769897, "epoch": 1.7572035899858292, "grad_norm": 26.787162425144906, "learning_rate": 4.3569329714950703e-07, "logits": -1.2427488565444946, "logps": -79.54029846191406, "loss": 0.0838, "objective": 0.08879180997610092, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.08879180997610092, "step": 620, "wo_beta": 15.718174934387207 }, { "dpo_loss": 0.6966572999954224, "epoch": 1.7713745866792632, "grad_norm": 27.034118419678652, "learning_rate": 4.275037739658771e-07, "logits": -1.1582579612731934, "logps": -78.85964965820312, "loss": 0.0817, "objective": 0.08299548178911209, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.08299548178911209, "step": 625, "wo_beta": 14.923952102661133 }, { "dpo_loss": 0.688913881778717, "epoch": 1.7855455833726972, "grad_norm": 25.951621085094303, "learning_rate": 4.193340705265745e-07, "logits": -1.1893038749694824, "logps": -80.92503356933594, "loss": 0.0785, "objective": 0.08198042213916779, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.48750001192092896, "regularize": 0.08198042213916779, "step": 630, "wo_beta": 15.90516185760498 }, { "dpo_loss": 0.6924195885658264, "epoch": 1.7997165800661312, "grad_norm": 26.232506833263244, "learning_rate": 4.1118642034694565e-07, "logits": -1.2785860300064087, "logps": -79.61809539794922, "loss": 0.0829, "objective": 0.08000766485929489, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5416666865348816, "regularize": 0.08000766485929489, "step": 635, "wo_beta": 15.796289443969727 }, { "dpo_loss": 0.6959947943687439, "epoch": 1.8138875767595655, "grad_norm": 27.993983855367574, "learning_rate": 4.030630509131959e-07, "logits": -1.2194859981536865, "logps": -80.71635437011719, "loss": 0.0842, "objective": 0.089814692735672, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5, "regularize": 0.089814692735672, "step": 640, "wo_beta": 16.932401657104492 }, { "dpo_loss": 0.6927257776260376, "epoch": 1.8280585734529995, "grad_norm": 28.107934645205802, "learning_rate": 3.9496618307341713e-07, "logits": -1.256467342376709, "logps": -81.03665161132812, "loss": 0.0853, "objective": 0.0889531597495079, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5541666746139526, "regularize": 0.0889531597495079, "step": 645, "wo_beta": 15.800675392150879 }, { "dpo_loss": 0.6969668865203857, "epoch": 1.8422295701464337, "grad_norm": 27.015977070193543, "learning_rate": 3.8689803043042996e-07, "logits": -1.2903110980987549, "logps": -80.92781829833984, "loss": 0.0847, "objective": 0.0801667794585228, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5249999761581421, "regularize": 0.0801667794585228, "step": 650, "wo_beta": 15.173321723937988 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.6974130868911743, "eval_logits": -1.2229208946228027, "eval_logps": -86.1129150390625, "eval_loss": 0.1379525512456894, "eval_objective": 0.13563887774944305, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.13563887774944305, "eval_runtime": 538.8083, "eval_samples_per_second": 10.746, "eval_steps_per_second": 0.896, "eval_wo_beta": 15.666037559509277, "step": 650 }, { "dpo_loss": 0.6896820068359375, "epoch": 1.8564005668398678, "grad_norm": 26.639855046988597, "learning_rate": 3.788607987366069e-07, "logits": -1.1662521362304688, "logps": -78.9451675415039, "loss": 0.081, "objective": 0.08504978567361832, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4958333373069763, "regularize": 0.08504978567361832, "step": 655, "wo_beta": 15.233590126037598 }, { "dpo_loss": 0.6891672611236572, "epoch": 1.8705715635333018, "grad_norm": 25.995274477757608, "learning_rate": 3.708566852908418e-07, "logits": -1.2193191051483154, "logps": -81.20162200927734, "loss": 0.0781, "objective": 0.08211526274681091, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5291666388511658, "regularize": 0.08211526274681091, "step": 660, "wo_beta": 16.372514724731445 }, { "dpo_loss": 0.6908305287361145, "epoch": 1.8847425602267358, "grad_norm": 28.66146531985666, "learning_rate": 3.6288787833783016e-07, "logits": -1.2218626737594604, "logps": -80.04493713378906, "loss": 0.0815, "objective": 0.08463230729103088, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5416666865348816, "regularize": 0.08463230729103088, "step": 665, "wo_beta": 15.034836769104004 }, { "dpo_loss": 0.6863933801651001, "epoch": 1.89891355692017, "grad_norm": 30.111613598581105, "learning_rate": 3.5495655646982503e-07, "logits": -1.1576950550079346, "logps": -78.58309173583984, "loss": 0.0755, "objective": 0.07363765686750412, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.47083333134651184, "regularize": 0.07363765686750412, "step": 670, "wo_beta": 16.09279441833496 }, { "dpo_loss": 0.695208728313446, "epoch": 1.9130845536136043, "grad_norm": 27.241452477717303, "learning_rate": 3.470648880310313e-07, "logits": -1.1648114919662476, "logps": -79.5347671508789, "loss": 0.0735, "objective": 0.07240771502256393, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5249999761581421, "regularize": 0.07240771502256393, "step": 675, "wo_beta": 15.440892219543457 }, { "dpo_loss": 0.6950518488883972, "epoch": 1.9272555503070383, "grad_norm": 25.198332305215366, "learning_rate": 3.3921503052480236e-07, "logits": -1.2177760601043701, "logps": -81.27088165283203, "loss": 0.0778, "objective": 0.07866664230823517, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5083333253860474, "regularize": 0.07866664230823517, "step": 680, "wo_beta": 14.848203659057617 }, { "dpo_loss": 0.6911803483963013, "epoch": 1.9414265470004723, "grad_norm": 26.10726119743999, "learning_rate": 3.314091300237999e-07, "logits": -1.1625895500183105, "logps": -78.15774536132812, "loss": 0.0738, "objective": 0.07330299913883209, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4749999940395355, "regularize": 0.07330299913883209, "step": 685, "wo_beta": 15.467205047607422 }, { "dpo_loss": 0.6860196590423584, "epoch": 1.9555975436939064, "grad_norm": 26.093926175967837, "learning_rate": 3.236493205832794e-07, "logits": -1.21792733669281, "logps": -79.12659454345703, "loss": 0.071, "objective": 0.07433832436800003, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5458333492279053, "regularize": 0.07433832436800003, "step": 690, "wo_beta": 15.627902030944824 }, { "dpo_loss": 0.6922653317451477, "epoch": 1.9697685403873406, "grad_norm": 26.38661425001647, "learning_rate": 3.15937723657661e-07, "logits": -1.1168206930160522, "logps": -79.83128356933594, "loss": 0.0723, "objective": 0.06720028072595596, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.49166667461395264, "regularize": 0.06720028072595596, "step": 695, "wo_beta": 16.023746490478516 }, { "dpo_loss": 0.6856616139411926, "epoch": 1.9839395370807746, "grad_norm": 27.380948849082866, "learning_rate": 3.082764475205442e-07, "logits": -1.103851079940796, "logps": -80.37809753417969, "loss": 0.071, "objective": 0.0717112347483635, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5166666507720947, "regularize": 0.0717112347483635, "step": 700, "wo_beta": 14.657614707946777 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6979688405990601, "eval_logits": -1.220837116241455, "eval_logps": -85.24955749511719, "eval_loss": 0.1420368105173111, "eval_objective": 0.14046597480773926, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 0.14046597480773926, "eval_runtime": 531.1966, "eval_samples_per_second": 10.9, "eval_steps_per_second": 0.909, "eval_wo_beta": 15.610904693603516, "step": 700 }, { "dpo_loss": 0.6904897093772888, "epoch": 1.9981105337742089, "grad_norm": 27.03253447324609, "learning_rate": 3.006675866883275e-07, "logits": -1.0365864038467407, "logps": -79.36177062988281, "loss": 0.0704, "objective": 0.07408583164215088, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.07408583164215088, "step": 705, "wo_beta": 16.758014678955078 }, { "dpo_loss": 0.6849521398544312, "epoch": 2.012281530467643, "grad_norm": 24.956147004394822, "learning_rate": 2.931132213475884e-07, "logits": -1.1888701915740967, "logps": -78.96455383300781, "loss": 0.0619, "objective": 0.06422288715839386, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.06422288715839386, "step": 710, "wo_beta": 15.760772705078125 }, { "dpo_loss": 0.6961538791656494, "epoch": 2.026452527161077, "grad_norm": 27.279846270487834, "learning_rate": 2.856154167863814e-07, "logits": -1.1860238313674927, "logps": -78.40641021728516, "loss": 0.0631, "objective": 0.06441039592027664, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.550000011920929, "regularize": 0.06441039592027664, "step": 715, "wo_beta": 14.784539222717285 }, { "dpo_loss": 0.6898289322853088, "epoch": 2.040623523854511, "grad_norm": 28.14233189102926, "learning_rate": 2.7817622282960813e-07, "logits": -1.1884685754776, "logps": -79.12120819091797, "loss": 0.0633, "objective": 0.06231885775923729, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.06231885775923729, "step": 720, "wo_beta": 15.765007972717285 }, { "dpo_loss": 0.6928841471672058, "epoch": 2.0547945205479454, "grad_norm": 25.78908501428665, "learning_rate": 2.707976732786166e-07, "logits": -1.1958059072494507, "logps": -81.6028060913086, "loss": 0.0578, "objective": 0.06330116838216782, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.06330116838216782, "step": 725, "wo_beta": 13.992663383483887 }, { "dpo_loss": 0.6905084252357483, "epoch": 2.0689655172413794, "grad_norm": 26.140030636203093, "learning_rate": 2.6348178535517965e-07, "logits": -1.2607707977294922, "logps": -79.21609497070312, "loss": 0.0598, "objective": 0.05353347584605217, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.4958333373069763, "regularize": 0.05353347584605217, "step": 730, "wo_beta": 15.095206260681152 }, { "dpo_loss": 0.6901918053627014, "epoch": 2.0831365139348135, "grad_norm": 27.302640565922513, "learning_rate": 2.5623055915000686e-07, "logits": -1.1885894536972046, "logps": -78.86723327636719, "loss": 0.0579, "objective": 0.05939151346683502, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.4833333194255829, "regularize": 0.05939151346683502, "step": 735, "wo_beta": 16.905290603637695 }, { "dpo_loss": 0.6903797388076782, "epoch": 2.0973075106282475, "grad_norm": 25.718973789328345, "learning_rate": 2.490459770759398e-07, "logits": -1.2478386163711548, "logps": -79.14292907714844, "loss": 0.0573, "objective": 0.05540405213832855, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5333333611488342, "regularize": 0.05540405213832855, "step": 740, "wo_beta": 15.3594331741333 }, { "dpo_loss": 0.6952056288719177, "epoch": 2.1114785073216815, "grad_norm": 26.12282917762503, "learning_rate": 2.419300033259798e-07, "logits": -1.1640416383743286, "logps": -79.09960174560547, "loss": 0.0628, "objective": 0.0631415918469429, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5541666746139526, "regularize": 0.0631415918469429, "step": 745, "wo_beta": 14.359167098999023 }, { "dpo_loss": 0.6888077259063721, "epoch": 2.1256495040151155, "grad_norm": 25.53259897003242, "learning_rate": 2.3488458333629773e-07, "logits": -1.2182810306549072, "logps": -78.26011657714844, "loss": 0.0546, "objective": 0.05781084671616554, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 0.05781084671616554, "step": 750, "wo_beta": 15.271900177001953 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.6980140209197998, "eval_logits": -1.2232871055603027, "eval_logps": -85.46907806396484, "eval_loss": 0.14231154322624207, "eval_objective": 0.14071756601333618, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.14071756601333618, "eval_runtime": 525.9214, "eval_samples_per_second": 11.009, "eval_steps_per_second": 0.918, "eval_wo_beta": 15.648022651672363, "step": 750 }, { "dpo_loss": 0.6901395320892334, "epoch": 2.13982050070855, "grad_norm": 25.775133405076527, "learning_rate": 2.2791164325437046e-07, "logits": -1.2039532661437988, "logps": -80.76856994628906, "loss": 0.0536, "objective": 0.054485421627759933, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5249999761581421, "regularize": 0.054485421627759933, "step": 755, "wo_beta": 16.363035202026367 }, { "dpo_loss": 0.6922858953475952, "epoch": 2.153991497401984, "grad_norm": 25.744794188993545, "learning_rate": 2.21013089412392e-07, "logits": -1.1505485773086548, "logps": -77.95565795898438, "loss": 0.0596, "objective": 0.056366052478551865, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5, "regularize": 0.056366052478551865, "step": 760, "wo_beta": 14.503907203674316 }, { "dpo_loss": 0.6935012936592102, "epoch": 2.168162494095418, "grad_norm": 25.81314805277084, "learning_rate": 2.1419080780610122e-07, "logits": -1.195157527923584, "logps": -79.0260009765625, "loss": 0.0569, "objective": 0.05813807621598244, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.05813807621598244, "step": 765, "wo_beta": 15.846463203430176 }, { "dpo_loss": 0.6929753422737122, "epoch": 2.182333490788852, "grad_norm": 26.17366253681256, "learning_rate": 2.0744666357916925e-07, "logits": -1.2156563997268677, "logps": -79.0594253540039, "loss": 0.0599, "objective": 0.06166267395019531, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5083333253860474, "regularize": 0.06166267395019531, "step": 770, "wo_beta": 13.665863037109375 }, { "dpo_loss": 0.6904846429824829, "epoch": 2.196504487482286, "grad_norm": 24.80909315966262, "learning_rate": 2.0078250051328782e-07, "logits": -1.20059072971344, "logps": -79.86570739746094, "loss": 0.0593, "objective": 0.05707041174173355, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.574999988079071, "regularize": 0.05707041174173355, "step": 775, "wo_beta": 16.53993797302246 }, { "dpo_loss": 0.6920241117477417, "epoch": 2.21067548417572, "grad_norm": 26.21741329158667, "learning_rate": 1.942001405240979e-07, "logits": -1.1453113555908203, "logps": -79.6847152709961, "loss": 0.0544, "objective": 0.05578133091330528, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.44583332538604736, "ranking_simple": 0.4375, "regularize": 0.05578133091330528, "step": 780, "wo_beta": 15.170312881469727 }, { "dpo_loss": 0.6935942769050598, "epoch": 2.2248464808691546, "grad_norm": 28.321911906643972, "learning_rate": 1.877013831630961e-07, "logits": -1.1368038654327393, "logps": -79.92477416992188, "loss": 0.0563, "objective": 0.0578266978263855, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4791666567325592, "regularize": 0.0578266978263855, "step": 785, "wo_beta": 14.784603118896484 }, { "dpo_loss": 0.6887209415435791, "epoch": 2.2390174775625886, "grad_norm": 25.1538491328267, "learning_rate": 1.812880051256551e-07, "logits": -1.1384888887405396, "logps": -80.59889221191406, "loss": 0.0504, "objective": 0.04905276745557785, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.550000011920929, "regularize": 0.04905276745557785, "step": 790, "wo_beta": 14.593072891235352 }, { "dpo_loss": 0.6941591501235962, "epoch": 2.2531884742560226, "grad_norm": 25.628975208912717, "learning_rate": 1.7496175976529337e-07, "logits": -1.1934906244277954, "logps": -81.73139953613281, "loss": 0.053, "objective": 0.05859142541885376, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5, "regularize": 0.05859142541885376, "step": 795, "wo_beta": 13.79269790649414 }, { "dpo_loss": 0.6919682621955872, "epoch": 2.2673594709494567, "grad_norm": 26.293732850411818, "learning_rate": 1.6872437661432516e-07, "logits": -1.2084691524505615, "logps": -80.88973999023438, "loss": 0.0531, "objective": 0.05279294773936272, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.05279294773936272, "step": 800, "wo_beta": 14.340437889099121 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6981291174888611, "eval_logits": -1.220612645149231, "eval_logps": -86.13679504394531, "eval_loss": 0.138593852519989, "eval_objective": 0.13714565336704254, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.13714565336704254, "eval_runtime": 503.3749, "eval_samples_per_second": 11.502, "eval_steps_per_second": 0.96, "eval_wo_beta": 15.623366355895996, "step": 800 }, { "dpo_loss": 0.6895002126693726, "epoch": 2.2815304676428907, "grad_norm": 26.85852458075238, "learning_rate": 1.62577560911024e-07, "logits": -1.1975409984588623, "logps": -79.75126647949219, "loss": 0.0473, "objective": 0.047933317720890045, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5166666507720947, "regularize": 0.047933317720890045, "step": 805, "wo_beta": 15.455560684204102 }, { "dpo_loss": 0.693041980266571, "epoch": 2.295701464336325, "grad_norm": 27.109828632522476, "learning_rate": 1.565229931334277e-07, "logits": -1.2860682010650635, "logps": -79.39039611816406, "loss": 0.051, "objective": 0.04613161459565163, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5458333492279053, "regularize": 0.04613161459565163, "step": 810, "wo_beta": 13.837719917297363 }, { "dpo_loss": 0.6961421966552734, "epoch": 2.309872461029759, "grad_norm": 26.77232369418631, "learning_rate": 1.5056232853991208e-07, "logits": -1.2426903247833252, "logps": -80.33802032470703, "loss": 0.0483, "objective": 0.04774492606520653, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.4625000059604645, "regularize": 0.04774492606520653, "step": 815, "wo_beta": 15.377904891967773 }, { "dpo_loss": 0.6943568587303162, "epoch": 2.324043457723193, "grad_norm": 25.84415791966093, "learning_rate": 1.4469719671666043e-07, "logits": -1.1784952878952026, "logps": -79.52135467529297, "loss": 0.0497, "objective": 0.0464615561068058, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5041666626930237, "regularize": 0.0464615561068058, "step": 820, "wo_beta": 14.641592979431152 }, { "dpo_loss": 0.6941722631454468, "epoch": 2.3382144544166272, "grad_norm": 26.057445300358456, "learning_rate": 1.389292011321498e-07, "logits": -1.1956678628921509, "logps": -78.97592163085938, "loss": 0.0489, "objective": 0.04843177646398544, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5375000238418579, "regularize": 0.04843177646398544, "step": 825, "wo_beta": 15.882107734680176 }, { "dpo_loss": 0.6919335722923279, "epoch": 2.3523854511100613, "grad_norm": 25.587425832586177, "learning_rate": 1.3325991869878012e-07, "logits": -1.1966559886932373, "logps": -81.00519561767578, "loss": 0.0487, "objective": 0.05618049576878548, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.05618049576878548, "step": 830, "wo_beta": 15.746501922607422 }, { "dpo_loss": 0.6917215585708618, "epoch": 2.3665564478034957, "grad_norm": 25.756644403885232, "learning_rate": 1.2769089934176126e-07, "logits": -1.168601632118225, "logps": -80.84972381591797, "loss": 0.0488, "objective": 0.052498627454042435, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 0.052498627454042435, "step": 835, "wo_beta": 14.608040809631348 }, { "dpo_loss": 0.6898554563522339, "epoch": 2.3807274444969297, "grad_norm": 25.072094771225707, "learning_rate": 1.222236655753791e-07, "logits": -1.1249865293502808, "logps": -80.45842742919922, "loss": 0.0434, "objective": 0.04277409613132477, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4625000059604645, "regularize": 0.04277409613132477, "step": 840, "wo_beta": 16.011308670043945 }, { "dpo_loss": 0.6897058486938477, "epoch": 2.3948984411903638, "grad_norm": 31.2138593781791, "learning_rate": 1.1685971208675538e-07, "logits": -1.1826022863388062, "logps": -81.36385345458984, "loss": 0.0438, "objective": 0.04376084357500076, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5333333611488342, "regularize": 0.04376084357500076, "step": 845, "wo_beta": 15.694497108459473 }, { "dpo_loss": 0.689830482006073, "epoch": 2.409069437883798, "grad_norm": 26.424193566129606, "learning_rate": 1.1160050532721527e-07, "logits": -1.2078933715820312, "logps": -79.71755981445312, "loss": 0.0444, "objective": 0.04779530316591263, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.49166667461395264, "regularize": 0.04779530316591263, "step": 850, "wo_beta": 15.619561195373535 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.6980399489402771, "eval_logits": -1.2270959615707397, "eval_logps": -86.03622436523438, "eval_loss": 0.13948112726211548, "eval_objective": 0.1381867229938507, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.1381867229938507, "eval_runtime": 508.2715, "eval_samples_per_second": 11.392, "eval_steps_per_second": 0.95, "eval_wo_beta": 15.647224426269531, "step": 850 }, { "dpo_loss": 0.690664529800415, "epoch": 2.423240434577232, "grad_norm": 26.086004792829357, "learning_rate": 1.0644748311137375e-07, "logits": -1.2208842039108276, "logps": -79.23947143554688, "loss": 0.0431, "objective": 0.044093988835811615, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5249999761581421, "regularize": 0.044093988835811615, "step": 855, "wo_beta": 14.724575996398926 }, { "dpo_loss": 0.6878847479820251, "epoch": 2.4374114312706663, "grad_norm": 24.819758120044014, "learning_rate": 1.0140205422405212e-07, "logits": -1.172597050666809, "logps": -80.47863006591797, "loss": 0.0425, "objective": 0.044025711715221405, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5333333611488342, "regularize": 0.044025711715221405, "step": 860, "wo_beta": 15.323599815368652 }, { "dpo_loss": 0.6902381777763367, "epoch": 2.4515824279641003, "grad_norm": 27.313034441936136, "learning_rate": 9.646559803512993e-08, "logits": -1.2031606435775757, "logps": -79.59320831298828, "loss": 0.0444, "objective": 0.04272008314728737, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.04272008314728737, "step": 865, "wo_beta": 15.875487327575684 }, { "dpo_loss": 0.6910372376441956, "epoch": 2.4657534246575343, "grad_norm": 25.76666127477957, "learning_rate": 9.163946412243895e-08, "logits": -1.2454520463943481, "logps": -80.33094024658203, "loss": 0.0442, "objective": 0.04635915905237198, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.04635915905237198, "step": 870, "wo_beta": 14.93254566192627 }, { "dpo_loss": 0.6905195713043213, "epoch": 2.4799244213509684, "grad_norm": 25.65493367025704, "learning_rate": 8.692497190280224e-08, "logits": -1.193867802619934, "logps": -79.73404693603516, "loss": 0.044, "objective": 0.04675581306219101, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.5625, "regularize": 0.04675581306219101, "step": 875, "wo_beta": 16.489763259887695 }, { "dpo_loss": 0.6905779242515564, "epoch": 2.4940954180444024, "grad_norm": 26.621663140091542, "learning_rate": 8.232341027131883e-08, "logits": -1.1066038608551025, "logps": -79.80467224121094, "loss": 0.0446, "objective": 0.046583421528339386, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.046583421528339386, "step": 880, "wo_beta": 17.46852684020996 }, { "dpo_loss": 0.6917292475700378, "epoch": 2.5082664147378364, "grad_norm": 24.02209120686893, "learning_rate": 7.783603724899257e-08, "logits": -1.25592041015625, "logps": -79.1759262084961, "loss": 0.0422, "objective": 0.04294423386454582, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.04294423386454582, "step": 885, "wo_beta": 16.415306091308594 }, { "dpo_loss": 0.6880825161933899, "epoch": 2.5224374114312704, "grad_norm": 26.181840029139675, "learning_rate": 7.346407963880136e-08, "logits": -1.1791417598724365, "logps": -78.21730041503906, "loss": 0.0424, "objective": 0.03773224726319313, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.03773224726319313, "step": 890, "wo_beta": 13.494309425354004 }, { "dpo_loss": 0.692958414554596, "epoch": 2.536608408124705, "grad_norm": 27.615133075738825, "learning_rate": 6.92087326903022e-08, "logits": -1.175589680671692, "logps": -80.6869888305664, "loss": 0.0444, "objective": 0.0476791188120842, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.48750001192092896, "regularize": 0.0476791188120842, "step": 895, "wo_beta": 16.41474151611328 }, { "dpo_loss": 0.6935379505157471, "epoch": 2.550779404818139, "grad_norm": 25.263999580012257, "learning_rate": 6.507115977286143e-08, "logits": -1.1382538080215454, "logps": -79.20881652832031, "loss": 0.0438, "objective": 0.044265471398830414, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.044265471398830414, "step": 900, "wo_beta": 15.096195220947266 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6975382566452026, "eval_logits": -1.2295913696289062, "eval_logps": -85.88396453857422, "eval_loss": 0.13868437707424164, "eval_objective": 0.13740767538547516, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.13740767538547516, "eval_runtime": 525.8368, "eval_samples_per_second": 11.011, "eval_steps_per_second": 0.919, "eval_wo_beta": 15.634546279907227, "step": 900 }, { "dpo_loss": 0.6917089819908142, "epoch": 2.564950401511573, "grad_norm": 25.44195334625603, "learning_rate": 6.105249205760127e-08, "logits": -1.2037063837051392, "logps": -79.04875183105469, "loss": 0.0411, "objective": 0.03601410239934921, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.03601410239934921, "step": 905, "wo_beta": 14.861380577087402 }, { "dpo_loss": 0.6932801008224487, "epoch": 2.579121398205007, "grad_norm": 26.495925146665332, "learning_rate": 5.7153828208148846e-08, "logits": -1.1827551126480103, "logps": -81.922607421875, "loss": 0.0424, "objective": 0.04883956164121628, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.04883956164121628, "step": 910, "wo_beta": 15.852696418762207 }, { "dpo_loss": 0.6898232102394104, "epoch": 2.593292394898441, "grad_norm": 25.88822340642525, "learning_rate": 5.337623408027292e-08, "logits": -1.2935634851455688, "logps": -80.87789916992188, "loss": 0.0403, "objective": 0.040093984454870224, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.040093984454870224, "step": 915, "wo_beta": 14.905534744262695 }, { "dpo_loss": 0.6920287013053894, "epoch": 2.6074633915918755, "grad_norm": 25.364010577767672, "learning_rate": 4.972074243048896e-08, "logits": -1.1468993425369263, "logps": -79.89569854736328, "loss": 0.0396, "objective": 0.03967604413628578, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.03967604413628578, "step": 920, "wo_beta": 15.246692657470703 }, { "dpo_loss": 0.6928901076316833, "epoch": 2.6216343882853095, "grad_norm": 27.967184575096596, "learning_rate": 4.6188352633713956e-08, "logits": -1.1743673086166382, "logps": -80.17101287841797, "loss": 0.0417, "objective": 0.04370425269007683, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.47083333134651184, "regularize": 0.04370425269007683, "step": 925, "wo_beta": 16.336292266845703 }, { "dpo_loss": 0.688522458076477, "epoch": 2.6358053849787435, "grad_norm": 26.578359144982873, "learning_rate": 4.2780030410047796e-08, "logits": -1.1617387533187866, "logps": -79.97476196289062, "loss": 0.0365, "objective": 0.03662450239062309, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.44583332538604736, "ranking_simple": 0.44583332538604736, "regularize": 0.03662450239062309, "step": 930, "wo_beta": 16.801166534423828 }, { "dpo_loss": 0.6928302645683289, "epoch": 2.6499763816721775, "grad_norm": 26.6756558913633, "learning_rate": 3.949670756075446e-08, "logits": -1.1548212766647339, "logps": -78.78431701660156, "loss": 0.0364, "objective": 0.0356716513633728, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.0356716513633728, "step": 935, "wo_beta": 15.733369827270508 }, { "dpo_loss": 0.6884638071060181, "epoch": 2.6641473783656116, "grad_norm": 26.11837122854028, "learning_rate": 3.63392817135173e-08, "logits": -1.213140845298767, "logps": -81.39899444580078, "loss": 0.0357, "objective": 0.03838236257433891, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.03838236257433891, "step": 940, "wo_beta": 16.71453094482422 }, { "dpo_loss": 0.6904810070991516, "epoch": 2.678318375059046, "grad_norm": 26.48243005501328, "learning_rate": 3.330861607703611e-08, "logits": -1.2477443218231201, "logps": -80.07948303222656, "loss": 0.0369, "objective": 0.03517834097146988, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.03517834097146988, "step": 945, "wo_beta": 15.665254592895508 }, { "dpo_loss": 0.6894643902778625, "epoch": 2.69248937175248, "grad_norm": 26.269248260275482, "learning_rate": 3.040553920503502e-08, "logits": -1.1376032829284668, "logps": -80.89375305175781, "loss": 0.0384, "objective": 0.03873926401138306, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 0.03873926401138306, "step": 950, "wo_beta": 14.65186882019043 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.6974536180496216, "eval_logits": -1.2285144329071045, "eval_logps": -85.95899963378906, "eval_loss": 0.13796193897724152, "eval_objective": 0.13680347800254822, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.13680347800254822, "eval_runtime": 502.396, "eval_samples_per_second": 11.525, "eval_steps_per_second": 0.961, "eval_wo_beta": 15.642508506774902, "step": 950 }, { "dpo_loss": 0.6890572905540466, "epoch": 2.706660368445914, "grad_norm": 24.74397275822761, "learning_rate": 2.7630844769743756e-08, "logits": -1.2225416898727417, "logps": -79.87822723388672, "loss": 0.0403, "objective": 0.04285174608230591, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.04285174608230591, "step": 955, "wo_beta": 13.80820369720459 }, { "dpo_loss": 0.6908868551254272, "epoch": 2.720831365139348, "grad_norm": 25.907101929875015, "learning_rate": 2.4985291344915673e-08, "logits": -1.1964094638824463, "logps": -79.958740234375, "loss": 0.0384, "objective": 0.03498096391558647, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5041666626930237, "regularize": 0.03498096391558647, "step": 960, "wo_beta": 16.096843719482422 }, { "dpo_loss": 0.6898122429847717, "epoch": 2.735002361832782, "grad_norm": 26.015895295989438, "learning_rate": 2.2469602198441573e-08, "logits": -1.2220391035079956, "logps": -80.10702514648438, "loss": 0.0368, "objective": 0.03775167092680931, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5666666626930237, "regularize": 0.03775167092680931, "step": 965, "wo_beta": 14.61376953125 }, { "dpo_loss": 0.6917709112167358, "epoch": 2.7491733585262166, "grad_norm": 24.33103792831753, "learning_rate": 2.008446509461498e-08, "logits": -1.2293510437011719, "logps": -81.0619888305664, "loss": 0.0341, "objective": 0.03296136483550072, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5208333134651184, "regularize": 0.03296136483550072, "step": 970, "wo_beta": 14.957200050354004 }, { "dpo_loss": 0.6909447908401489, "epoch": 2.7633443552196506, "grad_norm": 24.892680282575437, "learning_rate": 1.7830532106104746e-08, "logits": -1.1391520500183105, "logps": -79.50247955322266, "loss": 0.0358, "objective": 0.03571467101573944, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5416666865348816, "regularize": 0.03571467101573944, "step": 975, "wo_beta": 15.747049331665039 }, { "dpo_loss": 0.6906387209892273, "epoch": 2.7775153519130846, "grad_norm": 25.891776024282194, "learning_rate": 1.570841943568446e-08, "logits": -1.2599250078201294, "logps": -78.82478332519531, "loss": 0.0365, "objective": 0.03682435303926468, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.49166667461395264, "regularize": 0.03682435303926468, "step": 980, "wo_beta": 14.397340774536133 }, { "dpo_loss": 0.6933729648590088, "epoch": 2.7916863486065187, "grad_norm": 24.71596998222205, "learning_rate": 1.3718707247769134e-08, "logits": -1.1248877048492432, "logps": -77.72516632080078, "loss": 0.038, "objective": 0.03822270780801773, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.03822270780801773, "step": 985, "wo_beta": 14.327728271484375 }, { "dpo_loss": 0.691889762878418, "epoch": 2.8058573452999527, "grad_norm": 26.185929406261582, "learning_rate": 1.1861939509803686e-08, "logits": -1.1771855354309082, "logps": -81.14643859863281, "loss": 0.0369, "objective": 0.036898624151945114, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.47083333134651184, "regularize": 0.036898624151945114, "step": 990, "wo_beta": 15.375889778137207 }, { "dpo_loss": 0.6891864538192749, "epoch": 2.820028341993387, "grad_norm": 24.803225677825235, "learning_rate": 1.0138623843548078e-08, "logits": -1.2396986484527588, "logps": -79.1412353515625, "loss": 0.0365, "objective": 0.04024568572640419, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5458333492279053, "regularize": 0.04024568572640419, "step": 995, "wo_beta": 16.440141677856445 }, { "dpo_loss": 0.6907335519790649, "epoch": 2.8341993386868207, "grad_norm": 24.80804716491088, "learning_rate": 8.54923138629815e-09, "logits": -1.1814649105072021, "logps": -78.3318862915039, "loss": 0.0375, "objective": 0.03398551046848297, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5208333134651184, "regularize": 0.03398551046848297, "step": 1000, "wo_beta": 14.515811920166016 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6973779201507568, "eval_logits": -1.2304595708847046, "eval_logps": -85.99760437011719, "eval_loss": 0.1379886120557785, "eval_objective": 0.1368565410375595, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.1368565410375595, "eval_runtime": 504.9482, "eval_samples_per_second": 11.467, "eval_steps_per_second": 0.957, "eval_wo_beta": 15.63548755645752, "step": 1000 }, { "dpo_loss": 0.6911761164665222, "epoch": 2.848370335380255, "grad_norm": 27.32667601221845, "learning_rate": 7.09419666208183e-09, "logits": -1.1803662776947021, "logps": -78.7650375366211, "loss": 0.036, "objective": 0.03725501522421837, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5208333134651184, "regularize": 0.03725501522421837, "step": 1005, "wo_beta": 15.2937593460083 }, { "dpo_loss": 0.6888595223426819, "epoch": 2.862541332073689, "grad_norm": 26.14400831689978, "learning_rate": 5.773917462864264e-09, "logits": -1.2407745122909546, "logps": -79.07453918457031, "loss": 0.0359, "objective": 0.03689141198992729, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 0.03689141198992729, "step": 1010, "wo_beta": 15.180621147155762 }, { "dpo_loss": 0.6912004947662354, "epoch": 2.8767123287671232, "grad_norm": 24.9602315307722, "learning_rate": 4.588754739795586e-09, "logits": -1.1721571683883667, "logps": -78.31599426269531, "loss": 0.0354, "objective": 0.03823023661971092, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.03823023661971092, "step": 1015, "wo_beta": 14.313817977905273 }, { "dpo_loss": 0.6896302700042725, "epoch": 2.8908833254605573, "grad_norm": 24.85258883289883, "learning_rate": 3.53903250453047e-09, "logits": -1.1410295963287354, "logps": -80.05741882324219, "loss": 0.0343, "objective": 0.03470051661133766, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.03470051661133766, "step": 1020, "wo_beta": 17.722339630126953 }, { "dpo_loss": 0.6912213563919067, "epoch": 2.9050543221539913, "grad_norm": 25.437671735836517, "learning_rate": 2.6250377406467627e-09, "logits": -1.2291027307510376, "logps": -80.00859832763672, "loss": 0.0379, "objective": 0.037315838038921356, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5041666626930237, "regularize": 0.037315838038921356, "step": 1025, "wo_beta": 14.656061172485352 }, { "dpo_loss": 0.6911433935165405, "epoch": 2.9192253188474258, "grad_norm": 24.681518212372314, "learning_rate": 1.8470203251865768e-09, "logits": -1.2523103952407837, "logps": -80.20305633544922, "loss": 0.035, "objective": 0.03597547858953476, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4791666567325592, "regularize": 0.03597547858953476, "step": 1030, "wo_beta": 16.243247985839844 }, { "dpo_loss": 0.6904833316802979, "epoch": 2.9333963155408598, "grad_norm": 26.808499612926756, "learning_rate": 1.2051929603428823e-09, "logits": -1.2276477813720703, "logps": -80.6124496459961, "loss": 0.0344, "objective": 0.03077917918562889, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.03077917918562889, "step": 1035, "wo_beta": 14.297567367553711 }, { "dpo_loss": 0.6902684569358826, "epoch": 2.947567312234294, "grad_norm": 24.71043561481991, "learning_rate": 6.997311153086882e-10, "logits": -1.227773904800415, "logps": -80.38175201416016, "loss": 0.0364, "objective": 0.036134228110313416, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 0.036134228110313416, "step": 1040, "wo_beta": 16.110403060913086 }, { "dpo_loss": 0.6894943118095398, "epoch": 2.961738308927728, "grad_norm": 26.305013618654215, "learning_rate": 3.3077297830541585e-10, "logits": -1.1821495294570923, "logps": -81.93363189697266, "loss": 0.0371, "objective": 0.04041092470288277, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5541666746139526, "regularize": 0.04041092470288277, "step": 1045, "wo_beta": 17.30424690246582 }, { "dpo_loss": 0.6926708221435547, "epoch": 2.975909305621162, "grad_norm": 27.660126015515125, "learning_rate": 9.841941880361914e-11, "logits": -1.2283350229263306, "logps": -78.42631530761719, "loss": 0.0397, "objective": 0.03637199103832245, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.550000011920929, "regularize": 0.03637199103832245, "step": 1050, "wo_beta": 14.132574081420898 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.697369396686554, "eval_logits": -1.230570673942566, "eval_logps": -85.98023223876953, "eval_loss": 0.13814175128936768, "eval_objective": 0.13700547814369202, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.13700547814369202, "eval_runtime": 530.5394, "eval_samples_per_second": 10.913, "eval_steps_per_second": 0.91, "eval_wo_beta": 15.63470458984375, "step": 1050 }, { "dpo_loss": 0.689972996711731, "epoch": 2.9900803023145963, "grad_norm": 25.71242634224602, "learning_rate": 2.7339599464326622e-12, "logits": -1.2016465663909912, "logps": -79.08844757080078, "loss": 0.0389, "objective": 0.03705615550279617, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5541666746139526, "regularize": 0.03705615550279617, "step": 1055, "wo_beta": 14.549761772155762 }, { "epoch": 2.992914501653283, "step": 1056, "total_flos": 0.0, "train_loss": 0.08480868444806247, "train_runtime": 47353.1169, "train_samples_per_second": 3.218, "train_steps_per_second": 0.022 } ], "logging_steps": 5, "max_steps": 1056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }