diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3791 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.992914501653283, + "eval_steps": 50, + "global_step": 1056, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.002834199338686821, + "grad_norm": 36.794102305076855, + "learning_rate": 9.433962264150943e-09, + "logits": -1.2867579460144043, + "logps": -84.34933471679688, + "loss": 0.0051, + "objective": 0.0046141319908201694, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.0046141319908201694, + "step": 1, + "wo_beta": 14.841486930847168 + }, + { + "dpo_loss": 0.6930367350578308, + "epoch": 0.014170996693434105, + "grad_norm": 51.56528279298989, + "learning_rate": 4.7169811320754715e-08, + "logits": -1.4291929006576538, + "logps": -83.85256958007812, + "loss": 0.0058, + "objective": 0.005918528418987989, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4895833432674408, + "ranking_simple": 0.4895833432674408, + "regularize": 0.005918528418987989, + "step": 5, + "wo_beta": 16.667278289794922 + }, + { + "dpo_loss": 0.6930564641952515, + "epoch": 0.02834199338686821, + "grad_norm": 43.62540826850091, + "learning_rate": 9.433962264150943e-08, + "logits": -1.4014313220977783, + "logps": -84.90540313720703, + "loss": 0.0065, + "objective": 0.00607979716733098, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.5791666507720947, + "regularize": 0.00607979716733098, + "step": 10, + "wo_beta": 15.295255661010742 + }, + { + "dpo_loss": 0.691772997379303, + "epoch": 0.042512990080302314, + "grad_norm": 40.579476886356176, + "learning_rate": 1.4150943396226414e-07, + "logits": -1.5395350456237793, + "logps": -84.67674255371094, + "loss": 0.0077, + "objective": 0.007744006346911192, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.5666666626930237, + "regularize": 0.007744006346911192, + "step": 15, + "wo_beta": 15.72358512878418 + }, + { + "dpo_loss": 0.6908682584762573, + "epoch": 0.05668398677373642, + "grad_norm": 38.45055261776428, + "learning_rate": 1.8867924528301886e-07, + "logits": -1.3619084358215332, + "logps": -83.87267303466797, + "loss": 0.0106, + "objective": 0.011018705554306507, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4833333194255829, + "regularize": 0.011018705554306507, + "step": 20, + "wo_beta": 16.501863479614258 + }, + { + "dpo_loss": 0.6917246580123901, + "epoch": 0.07085498346717052, + "grad_norm": 37.49075261903623, + "learning_rate": 2.3584905660377358e-07, + "logits": -1.366659164428711, + "logps": -84.04557037353516, + "loss": 0.0144, + "objective": 0.012653553858399391, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5625, + "regularize": 0.012653553858399391, + "step": 25, + "wo_beta": 15.649717330932617 + }, + { + "dpo_loss": 0.6906312704086304, + "epoch": 0.08502598016060463, + "grad_norm": 35.42831042318107, + "learning_rate": 2.830188679245283e-07, + "logits": -1.4202715158462524, + "logps": -84.00289154052734, + "loss": 0.0156, + "objective": 0.015595527365803719, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4833333194255829, + "regularize": 0.015595527365803719, + "step": 30, + "wo_beta": 16.955543518066406 + }, + { + "dpo_loss": 0.6931964755058289, + "epoch": 0.09919697685403873, + "grad_norm": 37.327321600930496, + "learning_rate": 3.30188679245283e-07, + "logits": -1.3935037851333618, + "logps": -83.39187622070312, + "loss": 0.0202, + "objective": 0.021191226318478584, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.021191226318478584, + "step": 35, + "wo_beta": 16.169347763061523 + }, + { + "dpo_loss": 0.693729817867279, + "epoch": 0.11336797354747284, + "grad_norm": 41.6880498675233, + "learning_rate": 3.773584905660377e-07, + "logits": -1.381697177886963, + "logps": -83.91118621826172, + "loss": 0.0228, + "objective": 0.02042653225362301, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5166666507720947, + "regularize": 0.02042653225362301, + "step": 40, + "wo_beta": 14.309080123901367 + }, + { + "dpo_loss": 0.6919765472412109, + "epoch": 0.12753897024090693, + "grad_norm": 41.11048762433909, + "learning_rate": 4.2452830188679244e-07, + "logits": -1.3955552577972412, + "logps": -84.25520324707031, + "loss": 0.027, + "objective": 0.025382202118635178, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 0.025382202118635178, + "step": 45, + "wo_beta": 14.21595287322998 + }, + { + "dpo_loss": 0.6911224722862244, + "epoch": 0.14170996693434104, + "grad_norm": 41.07625280062658, + "learning_rate": 4.7169811320754717e-07, + "logits": -1.4127604961395264, + "logps": -85.3918685913086, + "loss": 0.0351, + "objective": 0.03202561289072037, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.03202561289072037, + "step": 50, + "wo_beta": 15.589811325073242 + }, + { + "epoch": 0.14170996693434104, + "eval_dpo_loss": 0.6926834583282471, + "eval_logits": -1.391736626625061, + "eval_logps": -91.23294067382812, + "eval_loss": 0.02213538996875286, + "eval_objective": 0.022384027019143105, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5212215185165405, + "eval_regularize": 0.022384027019143105, + "eval_runtime": 470.1076, + "eval_samples_per_second": 12.316, + "eval_steps_per_second": 1.027, + "eval_wo_beta": 16.221710205078125, + "step": 50 + }, + { + "dpo_loss": 0.6922997832298279, + "epoch": 0.15588096362777515, + "grad_norm": 36.466581476765526, + "learning_rate": 5.188679245283019e-07, + "logits": -1.3620656728744507, + "logps": -84.91451263427734, + "loss": 0.0367, + "objective": 0.0405682697892189, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.4833333194255829, + "regularize": 0.0405682697892189, + "step": 55, + "wo_beta": 15.095004081726074 + }, + { + "dpo_loss": 0.6875351071357727, + "epoch": 0.17005196032120926, + "grad_norm": 36.25782748515131, + "learning_rate": 5.660377358490566e-07, + "logits": -1.28928804397583, + "logps": -85.71366119384766, + "loss": 0.0403, + "objective": 0.04035286232829094, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5208333134651184, + "regularize": 0.04035286232829094, + "step": 60, + "wo_beta": 14.607115745544434 + }, + { + "dpo_loss": 0.6947705149650574, + "epoch": 0.18422295701464336, + "grad_norm": 41.25867915272223, + "learning_rate": 6.132075471698112e-07, + "logits": -1.3798266649246216, + "logps": -83.1692123413086, + "loss": 0.0491, + "objective": 0.050007414072752, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.050007414072752, + "step": 65, + "wo_beta": 14.976885795593262 + }, + { + "dpo_loss": 0.6880966424942017, + "epoch": 0.19839395370807747, + "grad_norm": 35.20333705483616, + "learning_rate": 6.60377358490566e-07, + "logits": -1.4017753601074219, + "logps": -85.73289489746094, + "loss": 0.0551, + "objective": 0.059768859297037125, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5249999761581421, + "regularize": 0.059768859297037125, + "step": 70, + "wo_beta": 15.204180717468262 + }, + { + "dpo_loss": 0.6949416995048523, + "epoch": 0.21256495040151158, + "grad_norm": 35.61853042350494, + "learning_rate": 7.075471698113207e-07, + "logits": -1.321311593055725, + "logps": -85.34779357910156, + "loss": 0.0579, + "objective": 0.06061805784702301, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5291666388511658, + "regularize": 0.06061805784702301, + "step": 75, + "wo_beta": 14.980683326721191 + }, + { + "dpo_loss": 0.6930631995201111, + "epoch": 0.22673594709494568, + "grad_norm": 34.9536345678453, + "learning_rate": 7.547169811320754e-07, + "logits": -1.4264112710952759, + "logps": -84.01344299316406, + "loss": 0.0626, + "objective": 0.062408361583948135, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5333333611488342, + "regularize": 0.062408361583948135, + "step": 80, + "wo_beta": 16.357084274291992 + }, + { + "dpo_loss": 0.6939026117324829, + "epoch": 0.2409069437883798, + "grad_norm": 35.4653089608865, + "learning_rate": 8.018867924528302e-07, + "logits": -1.4041804075241089, + "logps": -83.52224731445312, + "loss": 0.0695, + "objective": 0.07861108332872391, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5416666865348816, + "regularize": 0.07861108332872391, + "step": 85, + "wo_beta": 14.987756729125977 + }, + { + "dpo_loss": 0.6886675357818604, + "epoch": 0.25507794048181387, + "grad_norm": 38.910010820592774, + "learning_rate": 8.490566037735849e-07, + "logits": -1.5007805824279785, + "logps": -84.52466583251953, + "loss": 0.0806, + "objective": 0.08859896659851074, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5208333134651184, + "regularize": 0.08859896659851074, + "step": 90, + "wo_beta": 15.482732772827148 + }, + { + "dpo_loss": 0.6961393356323242, + "epoch": 0.269248937175248, + "grad_norm": 38.50762322649532, + "learning_rate": 8.962264150943396e-07, + "logits": -1.4152452945709229, + "logps": -83.7827377319336, + "loss": 0.0851, + "objective": 0.08412078768014908, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.47083333134651184, + "regularize": 0.08412078768014908, + "step": 95, + "wo_beta": 16.229019165039062 + }, + { + "dpo_loss": 0.6928918361663818, + "epoch": 0.2834199338686821, + "grad_norm": 34.07886171444254, + "learning_rate": 9.433962264150943e-07, + "logits": -1.2942625284194946, + "logps": -81.22164916992188, + "loss": 0.0877, + "objective": 0.08352937549352646, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4833333194255829, + "regularize": 0.08352937549352646, + "step": 100, + "wo_beta": 15.187151908874512 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.6921994090080261, + "eval_logits": -1.3862521648406982, + "eval_logps": -88.66019439697266, + "eval_loss": 0.04334083944559097, + "eval_objective": 0.04473063722252846, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.04473063722252846, + "eval_runtime": 472.2375, + "eval_samples_per_second": 12.261, + "eval_steps_per_second": 1.023, + "eval_wo_beta": 16.16818618774414, + "step": 100 + }, + { + "dpo_loss": 0.6989858150482178, + "epoch": 0.2975909305621162, + "grad_norm": 34.124768906394316, + "learning_rate": 9.90566037735849e-07, + "logits": -1.4883809089660645, + "logps": -83.63202667236328, + "loss": 0.0937, + "objective": 0.10326550155878067, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 0.10326550155878067, + "step": 105, + "wo_beta": 14.697186470031738 + }, + { + "dpo_loss": 0.6916998624801636, + "epoch": 0.3117619272555503, + "grad_norm": 36.53960499520599, + "learning_rate": 9.99956257238817e-07, + "logits": -1.3666936159133911, + "logps": -82.67723083496094, + "loss": 0.1009, + "objective": 0.09831760078668594, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.09831760078668594, + "step": 110, + "wo_beta": 14.75289249420166 + }, + { + "dpo_loss": 0.6972029805183411, + "epoch": 0.32593292394898443, + "grad_norm": 32.3431868996238, + "learning_rate": 9.997785653888834e-07, + "logits": -1.351915955543518, + "logps": -82.5732650756836, + "loss": 0.1062, + "objective": 0.10171337425708771, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5083333253860474, + "regularize": 0.10171337425708771, + "step": 115, + "wo_beta": 16.003950119018555 + }, + { + "dpo_loss": 0.6885399222373962, + "epoch": 0.3401039206424185, + "grad_norm": 35.92878266852989, + "learning_rate": 9.994642390694308e-07, + "logits": -1.367909550666809, + "logps": -82.90719604492188, + "loss": 0.1098, + "objective": 0.11067435145378113, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.11067435145378113, + "step": 120, + "wo_beta": 15.639138221740723 + }, + { + "dpo_loss": 0.6936843395233154, + "epoch": 0.35427491733585265, + "grad_norm": 30.26276247254467, + "learning_rate": 9.990133642141357e-07, + "logits": -1.3929860591888428, + "logps": -85.65290069580078, + "loss": 0.1056, + "objective": 0.11743973940610886, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4749999940395355, + "regularize": 0.11743973940610886, + "step": 125, + "wo_beta": 15.93514633178711 + }, + { + "dpo_loss": 0.6941003799438477, + "epoch": 0.3684459140292867, + "grad_norm": 39.21461417787312, + "learning_rate": 9.98426064087682e-07, + "logits": -1.3525993824005127, + "logps": -83.56419372558594, + "loss": 0.1211, + "objective": 0.11899420619010925, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.5833333134651184, + "regularize": 0.11899420619010925, + "step": 130, + "wo_beta": 16.0157527923584 + }, + { + "dpo_loss": 0.6882577538490295, + "epoch": 0.3826169107227208, + "grad_norm": 32.67768184928008, + "learning_rate": 9.977024992520601e-07, + "logits": -1.3901729583740234, + "logps": -84.39146423339844, + "loss": 0.1253, + "objective": 0.12414517998695374, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.12414517998695374, + "step": 135, + "wo_beta": 14.371219635009766 + }, + { + "dpo_loss": 0.6830641627311707, + "epoch": 0.39678790741615494, + "grad_norm": 33.07732649314307, + "learning_rate": 9.968428675226713e-07, + "logits": -1.3437649011611938, + "logps": -85.44697570800781, + "loss": 0.1248, + "objective": 0.12058641016483307, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5375000238418579, + "regularize": 0.12058641016483307, + "step": 140, + "wo_beta": 14.547070503234863 + }, + { + "dpo_loss": 0.6851420998573303, + "epoch": 0.410958904109589, + "grad_norm": 30.784646211601874, + "learning_rate": 9.958474039142469e-07, + "logits": -1.3567951917648315, + "logps": -86.4469223022461, + "loss": 0.1319, + "objective": 0.13056445121765137, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5625, + "regularize": 0.13056445121765137, + "step": 145, + "wo_beta": 13.91884994506836 + }, + { + "dpo_loss": 0.6960374116897583, + "epoch": 0.42512990080302315, + "grad_norm": 32.05337681597037, + "learning_rate": 9.947163805765979e-07, + "logits": -1.3565360307693481, + "logps": -86.30919647216797, + "loss": 0.1323, + "objective": 0.12925057113170624, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.5375000238418579, + "regularize": 0.12925057113170624, + "step": 150, + "wo_beta": 16.796695709228516 + }, + { + "epoch": 0.42512990080302315, + "eval_dpo_loss": 0.695567786693573, + "eval_logits": -1.3053797483444214, + "eval_logps": -90.43773651123047, + "eval_loss": 0.07677316665649414, + "eval_objective": 0.07639209181070328, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.07639209181070328, + "eval_runtime": 526.1958, + "eval_samples_per_second": 11.004, + "eval_steps_per_second": 0.918, + "eval_wo_beta": 16.003387451171875, + "step": 150 + }, + { + "dpo_loss": 0.6933045983314514, + "epoch": 0.43930089749645723, + "grad_norm": 31.605620123374155, + "learning_rate": 9.934501067202117e-07, + "logits": -1.3933676481246948, + "logps": -83.03238677978516, + "loss": 0.1358, + "objective": 0.1285211592912674, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4791666567325592, + "regularize": 0.1285211592912674, + "step": 155, + "wo_beta": 15.31113338470459 + }, + { + "dpo_loss": 0.6946766972541809, + "epoch": 0.45347189418989137, + "grad_norm": 32.22880904067845, + "learning_rate": 9.92048928531717e-07, + "logits": -1.2931861877441406, + "logps": -83.0308837890625, + "loss": 0.1338, + "objective": 0.12377996742725372, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5416666865348816, + "regularize": 0.12377996742725372, + "step": 160, + "wo_beta": 14.51412296295166 + }, + { + "dpo_loss": 0.6858457326889038, + "epoch": 0.46764289088332545, + "grad_norm": 28.56289647538006, + "learning_rate": 9.905132290792392e-07, + "logits": -1.3845534324645996, + "logps": -84.35334777832031, + "loss": 0.1295, + "objective": 0.13048619031906128, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.13048619031906128, + "step": 165, + "wo_beta": 15.858311653137207 + }, + { + "dpo_loss": 0.6987485289573669, + "epoch": 0.4818138875767596, + "grad_norm": 31.697158183348822, + "learning_rate": 9.888434282076757e-07, + "logits": -1.3974741697311401, + "logps": -82.40156555175781, + "loss": 0.1376, + "objective": 0.14300216734409332, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 0.14300216734409332, + "step": 170, + "wo_beta": 15.730175018310547 + }, + { + "dpo_loss": 0.6993536353111267, + "epoch": 0.49598488427019366, + "grad_norm": 30.951333756278135, + "learning_rate": 9.870399824239114e-07, + "logits": -1.2470077276229858, + "logps": -83.35051727294922, + "loss": 0.1401, + "objective": 0.13475559651851654, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 0.13475559651851654, + "step": 175, + "wo_beta": 17.82953643798828 + }, + { + "dpo_loss": 0.6983634233474731, + "epoch": 0.5101558809636277, + "grad_norm": 34.822921079044, + "learning_rate": 9.851033847720164e-07, + "logits": -1.2282413244247437, + "logps": -83.51294708251953, + "loss": 0.1442, + "objective": 0.143393412232399, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4625000059604645, + "regularize": 0.143393412232399, + "step": 180, + "wo_beta": 14.920221328735352 + }, + { + "dpo_loss": 0.6972795128822327, + "epoch": 0.5243268776570619, + "grad_norm": 34.3447207787113, + "learning_rate": 9.83034164698452e-07, + "logits": -1.2574915885925293, + "logps": -82.5478515625, + "loss": 0.1382, + "objective": 0.14230893552303314, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4749999940395355, + "regularize": 0.14230893552303314, + "step": 185, + "wo_beta": 14.194059371948242 + }, + { + "dpo_loss": 0.6978750824928284, + "epoch": 0.538497874350496, + "grad_norm": 34.00712851830173, + "learning_rate": 9.808328879073251e-07, + "logits": -1.2612725496292114, + "logps": -81.91997528076172, + "loss": 0.1466, + "objective": 0.14948724210262299, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5666666626930237, + "regularize": 0.14948724210262299, + "step": 190, + "wo_beta": 16.620363235473633 + }, + { + "dpo_loss": 0.6822370290756226, + "epoch": 0.5526688710439301, + "grad_norm": 31.586658287520144, + "learning_rate": 9.78500156205731e-07, + "logits": -1.2822577953338623, + "logps": -83.0813217163086, + "loss": 0.1319, + "objective": 0.13207347691059113, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5333333611488342, + "regularize": 0.13207347691059113, + "step": 195, + "wo_beta": 14.693647384643555 + }, + { + "dpo_loss": 0.7044106721878052, + "epoch": 0.5668398677373642, + "grad_norm": 30.369620708498754, + "learning_rate": 9.760366073392244e-07, + "logits": -1.3258157968521118, + "logps": -83.32820129394531, + "loss": 0.1427, + "objective": 0.15046708285808563, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5249999761581421, + "regularize": 0.15046708285808563, + "step": 200, + "wo_beta": 15.960111618041992 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.6959174871444702, + "eval_logits": -1.3123745918273926, + "eval_logps": -88.34333801269531, + "eval_loss": 0.10319730639457703, + "eval_objective": 0.10169863700866699, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.10169863700866699, + "eval_runtime": 532.3008, + "eval_samples_per_second": 10.877, + "eval_steps_per_second": 0.907, + "eval_wo_beta": 15.992826461791992, + "step": 200 + }, + { + "dpo_loss": 0.7000283598899841, + "epoch": 0.5810108644307983, + "grad_norm": 30.076737378719095, + "learning_rate": 9.734429148174674e-07, + "logits": -1.2141386270523071, + "logps": -82.74073028564453, + "loss": 0.1484, + "objective": 0.1470470279455185, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.49166667461395264, + "regularize": 0.1470470279455185, + "step": 205, + "wo_beta": 16.118446350097656 + }, + { + "dpo_loss": 0.6862087249755859, + "epoch": 0.5951818611242324, + "grad_norm": 31.36222267459615, + "learning_rate": 9.707197877300973e-07, + "logits": -1.2483521699905396, + "logps": -82.3885269165039, + "loss": 0.1454, + "objective": 0.14993111789226532, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5291666388511658, + "regularize": 0.14993111789226532, + "step": 210, + "wo_beta": 15.07961654663086 + }, + { + "dpo_loss": 0.6946883797645569, + "epoch": 0.6093528578176665, + "grad_norm": 32.25125352651472, + "learning_rate": 9.678679705528698e-07, + "logits": -1.3168671131134033, + "logps": -82.3456039428711, + "loss": 0.1384, + "objective": 0.14188070595264435, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4791666567325592, + "regularize": 0.14188070595264435, + "step": 215, + "wo_beta": 16.104469299316406 + }, + { + "dpo_loss": 0.7026723027229309, + "epoch": 0.6235238545111006, + "grad_norm": 30.142053540661294, + "learning_rate": 9.648882429441256e-07, + "logits": -1.3188337087631226, + "logps": -82.63532257080078, + "loss": 0.1477, + "objective": 0.1607874184846878, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.49166667461395264, + "regularize": 0.1607874184846878, + "step": 220, + "wo_beta": 17.079347610473633 + }, + { + "dpo_loss": 0.6998167634010315, + "epoch": 0.6376948512045347, + "grad_norm": 29.418648888160003, + "learning_rate": 9.61781419531641e-07, + "logits": -1.3314566612243652, + "logps": -82.72489929199219, + "loss": 0.1465, + "objective": 0.14282181859016418, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.4958333373069763, + "regularize": 0.14282181859016418, + "step": 225, + "wo_beta": 15.506386756896973 + }, + { + "dpo_loss": 0.7007436156272888, + "epoch": 0.6518658478979689, + "grad_norm": 31.584769522955447, + "learning_rate": 9.585483496899149e-07, + "logits": -1.2612279653549194, + "logps": -82.21707916259766, + "loss": 0.1434, + "objective": 0.14342841506004333, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5583333373069763, + "regularize": 0.14342841506004333, + "step": 230, + "wo_beta": 16.431724548339844 + }, + { + "dpo_loss": 0.7085835337638855, + "epoch": 0.6660368445914029, + "grad_norm": 33.138665174716316, + "learning_rate": 9.551899173079606e-07, + "logits": -1.2083913087844849, + "logps": -84.15171813964844, + "loss": 0.1479, + "objective": 0.14772751927375793, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5291666388511658, + "regularize": 0.14772751927375793, + "step": 235, + "wo_beta": 15.722906112670898 + }, + { + "dpo_loss": 0.6893501877784729, + "epoch": 0.680207841284837, + "grad_norm": 28.511782322472136, + "learning_rate": 9.517070405476574e-07, + "logits": -1.3556396961212158, + "logps": -83.491943359375, + "loss": 0.1408, + "objective": 0.1575685441493988, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1575685441493988, + "step": 240, + "wo_beta": 15.692626953125 + }, + { + "dpo_loss": 0.6901037693023682, + "epoch": 0.6943788379782712, + "grad_norm": 28.887977273452503, + "learning_rate": 9.481006715927351e-07, + "logits": -1.3499360084533691, + "logps": -82.59223937988281, + "loss": 0.1422, + "objective": 0.1397981345653534, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5416666865348816, + "regularize": 0.1397981345653534, + "step": 245, + "wo_beta": 15.627848625183105 + }, + { + "dpo_loss": 0.6898453831672668, + "epoch": 0.7085498346717053, + "grad_norm": 30.778123472149638, + "learning_rate": 9.443717963884568e-07, + "logits": -1.1249743700027466, + "logps": -81.38602447509766, + "loss": 0.1451, + "objective": 0.12806275486946106, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 0.12806275486946106, + "step": 250, + "wo_beta": 14.860217094421387 + }, + { + "epoch": 0.7085498346717053, + "eval_dpo_loss": 0.6950441002845764, + "eval_logits": -1.2854480743408203, + "eval_logps": -88.06980895996094, + "eval_loss": 0.11781599372625351, + "eval_objective": 0.11854107677936554, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5274327397346497, + "eval_regularize": 0.11854107677936554, + "eval_runtime": 533.5732, + "eval_samples_per_second": 10.851, + "eval_steps_per_second": 0.905, + "eval_wo_beta": 15.787796020507812, + "step": 250 + }, + { + "dpo_loss": 0.6893075704574585, + "epoch": 0.7227208313651393, + "grad_norm": 27.48861543576658, + "learning_rate": 9.405214343720706e-07, + "logits": -1.3376212120056152, + "logps": -81.39327239990234, + "loss": 0.1325, + "objective": 0.12804514169692993, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.42916667461395264, + "ranking_simple": 0.44583332538604736, + "regularize": 0.12804514169692993, + "step": 255, + "wo_beta": 14.828557968139648 + }, + { + "dpo_loss": 0.6919839978218079, + "epoch": 0.7368918280585735, + "grad_norm": 27.470977695013012, + "learning_rate": 9.365506381941065e-07, + "logits": -1.3046835660934448, + "logps": -83.32947540283203, + "loss": 0.1509, + "objective": 0.15500593185424805, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4583333432674408, + "regularize": 0.15500593185424805, + "step": 260, + "wo_beta": 15.419398307800293 + }, + { + "dpo_loss": 0.6987964510917664, + "epoch": 0.7510628247520076, + "grad_norm": 29.786537519342414, + "learning_rate": 9.32460493430591e-07, + "logits": -1.2736799716949463, + "logps": -82.46897888183594, + "loss": 0.1444, + "objective": 0.14515246450901031, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5708333253860474, + "regularize": 0.14515246450901031, + "step": 265, + "wo_beta": 15.908428192138672 + }, + { + "dpo_loss": 0.6944437026977539, + "epoch": 0.7652338214454416, + "grad_norm": 28.172549175339846, + "learning_rate": 9.282521182862629e-07, + "logits": -1.397876262664795, + "logps": -82.14982604980469, + "loss": 0.1491, + "objective": 0.15289539098739624, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.4541666805744171, + "regularize": 0.15289539098739624, + "step": 270, + "wo_beta": 14.118414878845215 + }, + { + "dpo_loss": 0.6878421902656555, + "epoch": 0.7794048181388757, + "grad_norm": 30.974249065309053, + "learning_rate": 9.239266632888658e-07, + "logits": -1.265884280204773, + "logps": -80.5745849609375, + "loss": 0.1429, + "objective": 0.13965575397014618, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 0.13965575397014618, + "step": 275, + "wo_beta": 15.147540092468262 + }, + { + "dpo_loss": 0.693124532699585, + "epoch": 0.7935758148323099, + "grad_norm": 27.26309671203667, + "learning_rate": 9.194853109746072e-07, + "logits": -1.317248821258545, + "logps": -80.71721649169922, + "loss": 0.1422, + "objective": 0.13741357624530792, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.13741357624530792, + "step": 280, + "wo_beta": 15.141572952270508 + }, + { + "dpo_loss": 0.6898981332778931, + "epoch": 0.807746811525744, + "grad_norm": 29.618387771117387, + "learning_rate": 9.14929275564863e-07, + "logits": -1.2990264892578125, + "logps": -81.34524536132812, + "loss": 0.1481, + "objective": 0.14202959835529327, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4749999940395355, + "regularize": 0.14202959835529327, + "step": 285, + "wo_beta": 16.715734481811523 + }, + { + "dpo_loss": 0.6989319920539856, + "epoch": 0.821917808219178, + "grad_norm": 30.35546225687188, + "learning_rate": 9.102598026342222e-07, + "logits": -1.310984492301941, + "logps": -80.47208404541016, + "loss": 0.1416, + "objective": 0.13658234477043152, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 0.13658234477043152, + "step": 290, + "wo_beta": 15.537436485290527 + }, + { + "dpo_loss": 0.692668080329895, + "epoch": 0.8360888049126122, + "grad_norm": 28.386489735858774, + "learning_rate": 9.0547816876996e-07, + "logits": -1.3056447505950928, + "logps": -80.58573913574219, + "loss": 0.1335, + "objective": 0.14200052618980408, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.48750001192092896, + "regularize": 0.14200052618980408, + "step": 295, + "wo_beta": 15.984179496765137 + }, + { + "dpo_loss": 0.6959550380706787, + "epoch": 0.8502598016060463, + "grad_norm": 30.90903589796416, + "learning_rate": 9.005856812230304e-07, + "logits": -1.2770187854766846, + "logps": -79.3738784790039, + "loss": 0.1305, + "objective": 0.12751255929470062, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5541666746139526, + "regularize": 0.12751255929470062, + "step": 300, + "wo_beta": 14.3499755859375 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.6960889101028442, + "eval_logits": -1.2862635850906372, + "eval_logps": -86.33123016357422, + "eval_loss": 0.12468627840280533, + "eval_objective": 0.1251634955406189, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5279502868652344, + "eval_regularize": 0.1251634955406189, + "eval_runtime": 492.3852, + "eval_samples_per_second": 11.759, + "eval_steps_per_second": 0.981, + "eval_wo_beta": 15.766751289367676, + "step": 300 + }, + { + "dpo_loss": 0.6915071606636047, + "epoch": 0.8644307982994804, + "grad_norm": 28.35320542673635, + "learning_rate": 8.955836775506775e-07, + "logits": -1.2531558275222778, + "logps": -80.3687744140625, + "loss": 0.1326, + "objective": 0.1348031610250473, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5249999761581421, + "regularize": 0.1348031610250473, + "step": 305, + "wo_beta": 15.622274398803711 + }, + { + "dpo_loss": 0.6971884965896606, + "epoch": 0.8786017949929145, + "grad_norm": 28.116582054859066, + "learning_rate": 8.904735252507609e-07, + "logits": -1.256584882736206, + "logps": -79.94914245605469, + "loss": 0.1365, + "objective": 0.1369226723909378, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.4958333373069763, + "regularize": 0.1369226723909378, + "step": 310, + "wo_beta": 14.816594123840332 + }, + { + "dpo_loss": 0.6855903267860413, + "epoch": 0.8927727916863486, + "grad_norm": 29.897768012112312, + "learning_rate": 8.852566213878946e-07, + "logits": -1.2702066898345947, + "logps": -79.8655014038086, + "loss": 0.1353, + "objective": 0.13145793974399567, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4749999940395355, + "regularize": 0.13145793974399567, + "step": 315, + "wo_beta": 15.161810874938965 + }, + { + "dpo_loss": 0.691845178604126, + "epoch": 0.9069437883797827, + "grad_norm": 28.736143424115674, + "learning_rate": 8.799343922115043e-07, + "logits": -1.2241441011428833, + "logps": -82.17134094238281, + "loss": 0.13, + "objective": 0.1402612328529358, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5541666746139526, + "regularize": 0.1402612328529358, + "step": 320, + "wo_beta": 15.099017143249512 + }, + { + "dpo_loss": 0.6962689161300659, + "epoch": 0.9211147850732169, + "grad_norm": 28.356303375759392, + "learning_rate": 8.745082927659046e-07, + "logits": -1.2910945415496826, + "logps": -83.30491638183594, + "loss": 0.1308, + "objective": 0.14350637793540955, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5166666507720947, + "regularize": 0.14350637793540955, + "step": 325, + "wo_beta": 15.133590698242188 + }, + { + "dpo_loss": 0.6975868344306946, + "epoch": 0.9352857817666509, + "grad_norm": 29.00689810312343, + "learning_rate": 8.689798064925048e-07, + "logits": -1.1349345445632935, + "logps": -82.04910278320312, + "loss": 0.1321, + "objective": 0.1296585500240326, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5291666388511658, + "regularize": 0.1296585500240326, + "step": 330, + "wo_beta": 16.1423282623291 + }, + { + "dpo_loss": 0.7005541920661926, + "epoch": 0.949456778460085, + "grad_norm": 32.3756572284601, + "learning_rate": 8.633504448242504e-07, + "logits": -1.149806261062622, + "logps": -81.64175415039062, + "loss": 0.1375, + "objective": 0.1390267014503479, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.1390267014503479, + "step": 335, + "wo_beta": 15.652006149291992 + }, + { + "dpo_loss": 0.6950960755348206, + "epoch": 0.9636277751535192, + "grad_norm": 27.122604040368284, + "learning_rate": 8.576217467724127e-07, + "logits": -1.2132624387741089, + "logps": -80.64006042480469, + "loss": 0.1292, + "objective": 0.12200692296028137, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5416666865348816, + "regularize": 0.12200692296028137, + "step": 340, + "wo_beta": 15.907822608947754 + }, + { + "dpo_loss": 0.6975562572479248, + "epoch": 0.9777987718469532, + "grad_norm": 27.134170349804087, + "learning_rate": 8.517952785058384e-07, + "logits": -1.2632955312728882, + "logps": -80.71128845214844, + "loss": 0.1283, + "objective": 0.11938898265361786, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.5708333253860474, + "regularize": 0.11938898265361786, + "step": 345, + "wo_beta": 14.762292861938477 + }, + { + "dpo_loss": 0.6852299571037292, + "epoch": 0.9919697685403873, + "grad_norm": 27.658996359022336, + "learning_rate": 8.458726329227747e-07, + "logits": -1.1914026737213135, + "logps": -81.73149108886719, + "loss": 0.1407, + "objective": 0.1554519683122635, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.1554519683122635, + "step": 350, + "wo_beta": 15.107101440429688 + }, + { + "epoch": 0.9919697685403873, + "eval_dpo_loss": 0.6975587606430054, + "eval_logits": -1.2756990194320679, + "eval_logps": -86.45014190673828, + "eval_loss": 0.13138790428638458, + "eval_objective": 0.13096390664577484, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5222567319869995, + "eval_regularize": 0.13096390664577484, + "eval_runtime": 498.5276, + "eval_samples_per_second": 11.614, + "eval_steps_per_second": 0.969, + "eval_wo_beta": 15.656978607177734, + "step": 350 + }, + { + "dpo_loss": 0.6982021331787109, + "epoch": 1.0061407652338215, + "grad_norm": 28.652193663332632, + "learning_rate": 8.398554292153865e-07, + "logits": -1.3350815773010254, + "logps": -79.34367370605469, + "loss": 0.1274, + "objective": 0.1257932186126709, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5333333611488342, + "regularize": 0.1257932186126709, + "step": 355, + "wo_beta": 16.378000259399414 + }, + { + "dpo_loss": 0.6944258809089661, + "epoch": 1.0203117619272555, + "grad_norm": 27.709591206743504, + "learning_rate": 8.337453124270862e-07, + "logits": -1.2474267482757568, + "logps": -80.31254577636719, + "loss": 0.1453, + "objective": 0.14443162083625793, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.4958333373069763, + "regularize": 0.14443162083625793, + "step": 360, + "wo_beta": 16.190935134887695 + }, + { + "dpo_loss": 0.6928178071975708, + "epoch": 1.0344827586206897, + "grad_norm": 31.070681767199403, + "learning_rate": 8.275439530027947e-07, + "logits": -1.276475191116333, + "logps": -80.50602722167969, + "loss": 0.1371, + "objective": 0.13979977369308472, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5166666507720947, + "regularize": 0.13979977369308472, + "step": 365, + "wo_beta": 14.378859519958496 + }, + { + "dpo_loss": 0.699609637260437, + "epoch": 1.0486537553141237, + "grad_norm": 30.003574042191506, + "learning_rate": 8.212530463322582e-07, + "logits": -1.2496185302734375, + "logps": -79.11912536621094, + "loss": 0.1306, + "objective": 0.1423943042755127, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.512499988079071, + "regularize": 0.1423943042755127, + "step": 370, + "wo_beta": 15.124627113342285 + }, + { + "dpo_loss": 0.6967942714691162, + "epoch": 1.0628247520075578, + "grad_norm": 27.953352635424668, + "learning_rate": 8.148743122865463e-07, + "logits": -1.3011940717697144, + "logps": -80.02760314941406, + "loss": 0.1297, + "objective": 0.11541719734668732, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5249999761581421, + "regularize": 0.11541719734668732, + "step": 375, + "wo_beta": 15.568713188171387 + }, + { + "dpo_loss": 0.6877638697624207, + "epoch": 1.076995748700992, + "grad_norm": 27.32675287386393, + "learning_rate": 8.084094947478554e-07, + "logits": -1.2718795537948608, + "logps": -81.57784271240234, + "loss": 0.1306, + "objective": 0.1403437703847885, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.1403437703847885, + "step": 380, + "wo_beta": 15.024064064025879 + }, + { + "dpo_loss": 0.7029018402099609, + "epoch": 1.091166745394426, + "grad_norm": 25.199092121516863, + "learning_rate": 8.018603611327504e-07, + "logits": -1.2051031589508057, + "logps": -80.49242401123047, + "loss": 0.1289, + "objective": 0.12692388892173767, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5375000238418579, + "regularize": 0.12692388892173767, + "step": 385, + "wo_beta": 15.658522605895996 + }, + { + "dpo_loss": 0.6920034885406494, + "epoch": 1.10533774208786, + "grad_norm": 28.52425339340298, + "learning_rate": 7.952287019089685e-07, + "logits": -1.1542584896087646, + "logps": -82.2014389038086, + "loss": 0.128, + "objective": 0.13050222396850586, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.5666666626930237, + "regularize": 0.13050222396850586, + "step": 390, + "wo_beta": 15.990551948547363 + }, + { + "dpo_loss": 0.6983939409255981, + "epoch": 1.1195087387812943, + "grad_norm": 28.676328293583875, + "learning_rate": 7.88516330105925e-07, + "logits": -1.21431303024292, + "logps": -81.3152847290039, + "loss": 0.1271, + "objective": 0.12024066597223282, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 0.12024066597223282, + "step": 395, + "wo_beta": 14.856566429138184 + }, + { + "dpo_loss": 0.7045825719833374, + "epoch": 1.1336797354747283, + "grad_norm": 26.361954924055155, + "learning_rate": 7.817250808190483e-07, + "logits": -1.2783249616622925, + "logps": -79.67323303222656, + "loss": 0.1245, + "objective": 0.12074790149927139, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5416666865348816, + "regularize": 0.12074790149927139, + "step": 400, + "wo_beta": 15.344539642333984 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.698018491268158, + "eval_logits": -1.2417831420898438, + "eval_logps": -86.2849349975586, + "eval_loss": 0.13988268375396729, + "eval_objective": 0.13904725015163422, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5258799195289612, + "eval_regularize": 0.13904725015163422, + "eval_runtime": 544.4211, + "eval_samples_per_second": 10.635, + "eval_steps_per_second": 0.887, + "eval_wo_beta": 15.614696502685547, + "step": 400 + }, + { + "dpo_loss": 0.6944829225540161, + "epoch": 1.1478507321681626, + "grad_norm": 26.829396266860115, + "learning_rate": 7.74856810708083e-07, + "logits": -1.2358256578445435, + "logps": -80.91136169433594, + "loss": 0.1252, + "objective": 0.13733495771884918, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.46666666865348816, + "regularize": 0.13733495771884918, + "step": 405, + "wo_beta": 16.799846649169922 + }, + { + "dpo_loss": 0.6951694488525391, + "epoch": 1.1620217288615966, + "grad_norm": 25.84880624163644, + "learning_rate": 7.679133974894982e-07, + "logits": -1.2413955926895142, + "logps": -80.84453582763672, + "loss": 0.1146, + "objective": 0.10967493802309036, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.512499988079071, + "regularize": 0.10967493802309036, + "step": 410, + "wo_beta": 16.299657821655273 + }, + { + "dpo_loss": 0.6816955804824829, + "epoch": 1.1761927255550306, + "grad_norm": 28.539266676030703, + "learning_rate": 7.608967394231386e-07, + "logits": -1.1460075378417969, + "logps": -80.07962799072266, + "loss": 0.1201, + "objective": 0.11568634957075119, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.512499988079071, + "regularize": 0.11568634957075119, + "step": 415, + "wo_beta": 15.849366188049316 + }, + { + "dpo_loss": 0.6958954334259033, + "epoch": 1.1903637222484649, + "grad_norm": 26.83226072322417, + "learning_rate": 7.538087547932584e-07, + "logits": -1.1252403259277344, + "logps": -80.94552612304688, + "loss": 0.1212, + "objective": 0.11827482283115387, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5083333253860474, + "regularize": 0.11827482283115387, + "step": 420, + "wo_beta": 16.14940643310547 + }, + { + "dpo_loss": 0.6887015700340271, + "epoch": 1.204534718941899, + "grad_norm": 26.51780573149761, + "learning_rate": 7.466513813840824e-07, + "logits": -1.1933962106704712, + "logps": -78.89797973632812, + "loss": 0.1135, + "objective": 0.1143736019730568, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5166666507720947, + "regularize": 0.1143736019730568, + "step": 425, + "wo_beta": 14.578470230102539 + }, + { + "dpo_loss": 0.6991615891456604, + "epoch": 1.2187057156353331, + "grad_norm": 27.122286588814305, + "learning_rate": 7.394265759500347e-07, + "logits": -1.1930339336395264, + "logps": -80.126220703125, + "loss": 0.1127, + "objective": 0.11676573753356934, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4749999940395355, + "regularize": 0.11676573753356934, + "step": 430, + "wo_beta": 15.9819974899292 + }, + { + "dpo_loss": 0.6940677762031555, + "epoch": 1.2328767123287672, + "grad_norm": 26.542064973728884, + "learning_rate": 7.321363136807818e-07, + "logits": -1.1478148698806763, + "logps": -80.018310546875, + "loss": 0.1273, + "objective": 0.12024448066949844, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.6000000238418579, + "regularize": 0.12024448066949844, + "step": 435, + "wo_beta": 17.044919967651367 + }, + { + "dpo_loss": 0.6969379186630249, + "epoch": 1.2470477090222012, + "grad_norm": 28.251093862423456, + "learning_rate": 7.247825876612352e-07, + "logits": -1.1687721014022827, + "logps": -79.19255828857422, + "loss": 0.1253, + "objective": 0.12027813494205475, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.550000011920929, + "regularize": 0.12027813494205475, + "step": 440, + "wo_beta": 14.654241561889648 + }, + { + "dpo_loss": 0.6925280094146729, + "epoch": 1.2612187057156352, + "grad_norm": 26.524957115429544, + "learning_rate": 7.173674083266623e-07, + "logits": -1.1623238325119019, + "logps": -80.57234191894531, + "loss": 0.1123, + "objective": 0.11110406368970871, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5416666865348816, + "regularize": 0.11110406368970871, + "step": 445, + "wo_beta": 16.533472061157227 + }, + { + "dpo_loss": 0.6959200501441956, + "epoch": 1.2753897024090695, + "grad_norm": 25.817109114436615, + "learning_rate": 7.098928029130528e-07, + "logits": -1.2953335046768188, + "logps": -80.17058563232422, + "loss": 0.1163, + "objective": 0.11630918085575104, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.11630918085575104, + "step": 450, + "wo_beta": 14.598237991333008 + }, + { + "epoch": 1.2753897024090695, + "eval_dpo_loss": 0.6984797716140747, + "eval_logits": -1.2306897640228271, + "eval_logps": -85.48281860351562, + "eval_loss": 0.14205217361450195, + "eval_objective": 0.14207439124584198, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5274327397346497, + "eval_regularize": 0.14207439124584198, + "eval_runtime": 502.3414, + "eval_samples_per_second": 11.526, + "eval_steps_per_second": 0.961, + "eval_wo_beta": 15.61281681060791, + "step": 450 + }, + { + "dpo_loss": 0.6977149248123169, + "epoch": 1.2895606991025035, + "grad_norm": 29.01794849451687, + "learning_rate": 7.023608149028936e-07, + "logits": -1.1321525573730469, + "logps": -79.79704284667969, + "loss": 0.1102, + "objective": 0.10798730701208115, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.4958333373069763, + "regularize": 0.10798730701208115, + "step": 455, + "wo_beta": 14.988642692565918 + }, + { + "dpo_loss": 0.6960881352424622, + "epoch": 1.3037316957959377, + "grad_norm": 25.82316278857825, + "learning_rate": 6.947735034665001e-07, + "logits": -1.2272473573684692, + "logps": -79.4093246459961, + "loss": 0.1071, + "objective": 0.10132616013288498, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.48750001192092896, + "regularize": 0.10132616013288498, + "step": 460, + "wo_beta": 15.888258934020996 + }, + { + "dpo_loss": 0.6906372308731079, + "epoch": 1.3179026924893718, + "grad_norm": 30.635018246102483, + "learning_rate": 6.871329428990601e-07, + "logits": -1.2102056741714478, + "logps": -78.2228775024414, + "loss": 0.1131, + "objective": 0.11604170501232147, + "ranking_idealized": 0.5541666746139526, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.46666666865348816, + "regularize": 0.11604170501232147, + "step": 465, + "wo_beta": 14.311129570007324 + }, + { + "dpo_loss": 0.700882077217102, + "epoch": 1.3320736891828058, + "grad_norm": 27.46778566417897, + "learning_rate": 6.794412220535425e-07, + "logits": -1.2833130359649658, + "logps": -77.55262756347656, + "loss": 0.108, + "objective": 0.10955775529146194, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5083333253860474, + "regularize": 0.10955775529146194, + "step": 470, + "wo_beta": 14.30273151397705 + }, + { + "dpo_loss": 0.6947088241577148, + "epoch": 1.34624468587624, + "grad_norm": 27.567991845029866, + "learning_rate": 6.717004437696249e-07, + "logits": -1.1878196001052856, + "logps": -79.7737808227539, + "loss": 0.1143, + "objective": 0.10682200640439987, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 0.10682200640439987, + "step": 475, + "wo_beta": 16.000301361083984 + }, + { + "dpo_loss": 0.7012575268745422, + "epoch": 1.360415682569674, + "grad_norm": 26.96292751307233, + "learning_rate": 6.639127242987987e-07, + "logits": -1.2194726467132568, + "logps": -79.7364730834961, + "loss": 0.1121, + "objective": 0.10879840701818466, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4958333373069763, + "regularize": 0.10879840701818466, + "step": 480, + "wo_beta": 17.723169326782227 + }, + { + "dpo_loss": 0.6902076005935669, + "epoch": 1.3745866792631083, + "grad_norm": 25.536217139623062, + "learning_rate": 6.560801927258079e-07, + "logits": -1.2140812873840332, + "logps": -77.77493286132812, + "loss": 0.1063, + "objective": 0.10283537954092026, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.4958333373069763, + "regularize": 0.10283537954092026, + "step": 485, + "wo_beta": 16.162378311157227 + }, + { + "dpo_loss": 0.7003743648529053, + "epoch": 1.3887576759565423, + "grad_norm": 25.340128312194377, + "learning_rate": 6.482049903865768e-07, + "logits": -1.1755324602127075, + "logps": -80.6698226928711, + "loss": 0.1065, + "objective": 0.11661101877689362, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5375000238418579, + "regularize": 0.11661101877689362, + "step": 490, + "wo_beta": 15.291964530944824 + }, + { + "dpo_loss": 0.6933376789093018, + "epoch": 1.4029286726499763, + "grad_norm": 27.42214588210337, + "learning_rate": 6.402892702827916e-07, + "logits": -1.203405499458313, + "logps": -81.71482849121094, + "loss": 0.1083, + "objective": 0.11117922514677048, + "ranking_idealized": 0.5583333373069763, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4833333194255829, + "regularize": 0.11117922514677048, + "step": 495, + "wo_beta": 15.19780445098877 + }, + { + "dpo_loss": 0.6919417977333069, + "epoch": 1.4170996693434104, + "grad_norm": 25.59099749967404, + "learning_rate": 6.323351964932908e-07, + "logits": -1.1464035511016846, + "logps": -80.67649841308594, + "loss": 0.1071, + "objective": 0.10751333087682724, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.49166667461395264, + "regularize": 0.10751333087682724, + "step": 500, + "wo_beta": 14.786382675170898 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6979657411575317, + "eval_logits": -1.2270138263702393, + "eval_logps": -87.26725006103516, + "eval_loss": 0.13817694783210754, + "eval_objective": 0.1376110315322876, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5284678936004639, + "eval_regularize": 0.1376110315322876, + "eval_runtime": 507.9058, + "eval_samples_per_second": 11.4, + "eval_steps_per_second": 0.951, + "eval_wo_beta": 15.64445686340332, + "step": 500 + }, + { + "dpo_loss": 0.6891559362411499, + "epoch": 1.4312706660368446, + "grad_norm": 24.27122577359571, + "learning_rate": 6.243449435824276e-07, + "logits": -1.2177590131759644, + "logps": -81.35147094726562, + "loss": 0.1101, + "objective": 0.1094871535897255, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 0.1094871535897255, + "step": 505, + "wo_beta": 15.79046630859375 + }, + { + "dpo_loss": 0.6941244602203369, + "epoch": 1.4454416627302786, + "grad_norm": 25.930769694740054, + "learning_rate": 6.163206960055652e-07, + "logits": -1.251134991645813, + "logps": -83.10639953613281, + "loss": 0.1006, + "objective": 0.09994279593229294, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5291666388511658, + "regularize": 0.09994279593229294, + "step": 510, + "wo_beta": 14.899516105651855 + }, + { + "dpo_loss": 0.6874905824661255, + "epoch": 1.4596126594237129, + "grad_norm": 26.602314880639124, + "learning_rate": 6.082646475118699e-07, + "logits": -1.2633229494094849, + "logps": -84.02688598632812, + "loss": 0.106, + "objective": 0.10199037194252014, + "ranking_idealized": 0.550000011920929, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4541666805744171, + "regularize": 0.10199037194252014, + "step": 515, + "wo_beta": 15.941681861877441 + }, + { + "dpo_loss": 0.6967552900314331, + "epoch": 1.473783656117147, + "grad_norm": 29.852612268822412, + "learning_rate": 6.001790005445606e-07, + "logits": -1.184912919998169, + "logps": -80.95891571044922, + "loss": 0.1071, + "objective": 0.10300089418888092, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.49166667461395264, + "regularize": 0.10300089418888092, + "step": 520, + "wo_beta": 15.731270790100098 + }, + { + "dpo_loss": 0.6896428465843201, + "epoch": 1.487954652810581, + "grad_norm": 25.111572790175902, + "learning_rate": 5.920659656387836e-07, + "logits": -1.0910202264785767, + "logps": -79.46784973144531, + "loss": 0.1087, + "objective": 0.10289794951677322, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 0.10289794951677322, + "step": 525, + "wo_beta": 14.915215492248535 + }, + { + "dpo_loss": 0.6884135603904724, + "epoch": 1.5021256495040152, + "grad_norm": 29.155836377588727, + "learning_rate": 5.839277608172738e-07, + "logits": -1.2429722547531128, + "logps": -82.09452056884766, + "loss": 0.1052, + "objective": 0.11081438511610031, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5083333253860474, + "regularize": 0.11081438511610031, + "step": 530, + "wo_beta": 14.850537300109863 + }, + { + "dpo_loss": 0.6961663961410522, + "epoch": 1.5162966461974492, + "grad_norm": 25.320925581209725, + "learning_rate": 5.757666109839702e-07, + "logits": -1.2323859930038452, + "logps": -80.30747985839844, + "loss": 0.1001, + "objective": 0.09293892234563828, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 0.09293892234563828, + "step": 535, + "wo_beta": 15.262944221496582 + }, + { + "dpo_loss": 0.6923481225967407, + "epoch": 1.5304676428908834, + "grad_norm": 32.01848958383342, + "learning_rate": 5.675847473157485e-07, + "logits": -1.1209362745285034, + "logps": -80.81604766845703, + "loss": 0.1017, + "objective": 0.1114068478345871, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.1114068478345871, + "step": 540, + "wo_beta": 14.910977363586426 + }, + { + "dpo_loss": 0.6916370987892151, + "epoch": 1.5446386395843175, + "grad_norm": 25.57319909143034, + "learning_rate": 5.5938440665244e-07, + "logits": -1.2216829061508179, + "logps": -81.30005645751953, + "loss": 0.1016, + "objective": 0.09744974970817566, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.5791666507720947, + "regularize": 0.09744974970817566, + "step": 545, + "wo_beta": 14.310770988464355 + }, + { + "dpo_loss": 0.6908753514289856, + "epoch": 1.5588096362777515, + "grad_norm": 25.722462769354692, + "learning_rate": 5.511678308853025e-07, + "logits": -1.2278273105621338, + "logps": -81.18257141113281, + "loss": 0.1045, + "objective": 0.11294317990541458, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.44583332538604736, + "regularize": 0.11294317990541458, + "step": 550, + "wo_beta": 14.909473419189453 + }, + { + "epoch": 1.5588096362777515, + "eval_dpo_loss": 0.6977279186248779, + "eval_logits": -1.2327359914779663, + "eval_logps": -87.07755279541016, + "eval_loss": 0.1427639275789261, + "eval_objective": 0.14261718094348907, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5253623127937317, + "eval_regularize": 0.14261718094348907, + "eval_runtime": 530.6929, + "eval_samples_per_second": 10.91, + "eval_steps_per_second": 0.91, + "eval_wo_beta": 15.58066177368164, + "step": 550 + }, + { + "dpo_loss": 0.6979414820671082, + "epoch": 1.5729806329711855, + "grad_norm": 25.97117042381748, + "learning_rate": 5.429372663441085e-07, + "logits": -1.0773119926452637, + "logps": -80.85298919677734, + "loss": 0.0969, + "objective": 0.10372842103242874, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5083333253860474, + "regularize": 0.10372842103242874, + "step": 555, + "wo_beta": 14.278889656066895 + }, + { + "dpo_loss": 0.6877902746200562, + "epoch": 1.5871516296646198, + "grad_norm": 26.089498554586406, + "learning_rate": 5.34694963183022e-07, + "logits": -1.149969220161438, + "logps": -80.23606872558594, + "loss": 0.0937, + "objective": 0.0943736732006073, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.0943736732006073, + "step": 560, + "wo_beta": 15.772320747375488 + }, + { + "dpo_loss": 0.6947767734527588, + "epoch": 1.601322626358054, + "grad_norm": 26.116747650931945, + "learning_rate": 5.264431747654283e-07, + "logits": -1.1340062618255615, + "logps": -81.63863372802734, + "loss": 0.0947, + "objective": 0.10096503049135208, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.4416666626930237, + "regularize": 0.10096503049135208, + "step": 565, + "wo_beta": 14.981669425964355 + }, + { + "dpo_loss": 0.692035436630249, + "epoch": 1.615493623051488, + "grad_norm": 30.63214010200871, + "learning_rate": 5.181841570478872e-07, + "logits": -1.2694156169891357, + "logps": -81.64689636230469, + "loss": 0.0959, + "objective": 0.1027316302061081, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5375000238418579, + "regularize": 0.1027316302061081, + "step": 570, + "wo_beta": 15.00640869140625 + }, + { + "dpo_loss": 0.6928724646568298, + "epoch": 1.629664619744922, + "grad_norm": 26.684109688489027, + "learning_rate": 5.099201679633768e-07, + "logits": -1.219287633895874, + "logps": -79.6671371459961, + "loss": 0.0902, + "objective": 0.08943381905555725, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5458333492279053, + "regularize": 0.08943381905555725, + "step": 575, + "wo_beta": 15.883743286132812 + }, + { + "dpo_loss": 0.7014293670654297, + "epoch": 1.643835616438356, + "grad_norm": 23.659415037737205, + "learning_rate": 5.016534668039976e-07, + "logits": -1.245025396347046, + "logps": -79.65864562988281, + "loss": 0.0922, + "objective": 0.09364978969097137, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5375000238418579, + "regularize": 0.09364978969097137, + "step": 580, + "wo_beta": 14.111478805541992 + }, + { + "dpo_loss": 0.6919021606445312, + "epoch": 1.6580066131317903, + "grad_norm": 26.81667336982406, + "learning_rate": 4.933863136033039e-07, + "logits": -1.1303011178970337, + "logps": -79.01573944091797, + "loss": 0.0912, + "objective": 0.09164983779191971, + "ranking_idealized": 0.5333333611488342, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.4749999940395355, + "regularize": 0.09164983779191971, + "step": 585, + "wo_beta": 15.943554878234863 + }, + { + "dpo_loss": 0.6958838701248169, + "epoch": 1.6721776098252243, + "grad_norm": 24.98087975104312, + "learning_rate": 4.851209685184338e-07, + "logits": -1.1811211109161377, + "logps": -78.23771667480469, + "loss": 0.0896, + "objective": 0.08815690129995346, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.49166667461395264, + "regularize": 0.08815690129995346, + "step": 590, + "wo_beta": 13.053691864013672 + }, + { + "dpo_loss": 0.7018415927886963, + "epoch": 1.6863486065186586, + "grad_norm": 28.222712089048155, + "learning_rate": 4.768596912122045e-07, + "logits": -1.1410056352615356, + "logps": -78.93828582763672, + "loss": 0.0867, + "objective": 0.08855770528316498, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5208333134651184, + "regularize": 0.08855770528316498, + "step": 595, + "wo_beta": 16.56429672241211 + }, + { + "dpo_loss": 0.6934791803359985, + "epoch": 1.7005196032120926, + "grad_norm": 26.368906194308657, + "learning_rate": 4.686047402353433e-07, + "logits": -1.1907525062561035, + "logps": -80.13634490966797, + "loss": 0.0866, + "objective": 0.09509587287902832, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5375000238418579, + "regularize": 0.09509587287902832, + "step": 600, + "wo_beta": 15.167766571044922 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6965176463127136, + "eval_logits": -1.2196165323257446, + "eval_logps": -85.19258880615234, + "eval_loss": 0.14236733317375183, + "eval_objective": 0.14079627394676208, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5269151329994202, + "eval_regularize": 0.14079627394676208, + "eval_runtime": 531.3996, + "eval_samples_per_second": 10.896, + "eval_steps_per_second": 0.909, + "eval_wo_beta": 15.660321235656738, + "step": 600 + }, + { + "dpo_loss": 0.6949159502983093, + "epoch": 1.7146905999055266, + "grad_norm": 27.65546942935795, + "learning_rate": 4.60358372409022e-07, + "logits": -1.135356068611145, + "logps": -80.58204650878906, + "loss": 0.0851, + "objective": 0.10239014774560928, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5166666507720947, + "regularize": 0.10239014774560928, + "step": 605, + "wo_beta": 17.19474220275879 + }, + { + "dpo_loss": 0.6975926160812378, + "epoch": 1.7288615965989607, + "grad_norm": 27.123580050770954, + "learning_rate": 4.521228422078649e-07, + "logits": -1.2206453084945679, + "logps": -78.68167877197266, + "loss": 0.0882, + "objective": 0.0891619473695755, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.5791666507720947, + "regularize": 0.0891619473695755, + "step": 610, + "wo_beta": 15.383539199829102 + }, + { + "dpo_loss": 0.6973095536231995, + "epoch": 1.743032593292395, + "grad_norm": 27.24577954816879, + "learning_rate": 4.439004011435979e-07, + "logits": -1.2362395524978638, + "logps": -79.0839614868164, + "loss": 0.0875, + "objective": 0.08598390221595764, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5166666507720947, + "regularize": 0.08598390221595764, + "step": 615, + "wo_beta": 15.571494102478027 + }, + { + "dpo_loss": 0.6836999654769897, + "epoch": 1.7572035899858292, + "grad_norm": 26.787162425144906, + "learning_rate": 4.3569329714950703e-07, + "logits": -1.2427488565444946, + "logps": -79.54029846191406, + "loss": 0.0838, + "objective": 0.08879180997610092, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.08879180997610092, + "step": 620, + "wo_beta": 15.718174934387207 + }, + { + "dpo_loss": 0.6966572999954224, + "epoch": 1.7713745866792632, + "grad_norm": 27.034118419678652, + "learning_rate": 4.275037739658771e-07, + "logits": -1.1582579612731934, + "logps": -78.85964965820312, + "loss": 0.0817, + "objective": 0.08299548178911209, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.08299548178911209, + "step": 625, + "wo_beta": 14.923952102661133 + }, + { + "dpo_loss": 0.688913881778717, + "epoch": 1.7855455833726972, + "grad_norm": 25.951621085094303, + "learning_rate": 4.193340705265745e-07, + "logits": -1.1893038749694824, + "logps": -80.92503356933594, + "loss": 0.0785, + "objective": 0.08198042213916779, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.48750001192092896, + "regularize": 0.08198042213916779, + "step": 630, + "wo_beta": 15.90516185760498 + }, + { + "dpo_loss": 0.6924195885658264, + "epoch": 1.7997165800661312, + "grad_norm": 26.232506833263244, + "learning_rate": 4.1118642034694565e-07, + "logits": -1.2785860300064087, + "logps": -79.61809539794922, + "loss": 0.0829, + "objective": 0.08000766485929489, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5416666865348816, + "regularize": 0.08000766485929489, + "step": 635, + "wo_beta": 15.796289443969727 + }, + { + "dpo_loss": 0.6959947943687439, + "epoch": 1.8138875767595655, + "grad_norm": 27.993983855367574, + "learning_rate": 4.030630509131959e-07, + "logits": -1.2194859981536865, + "logps": -80.71635437011719, + "loss": 0.0842, + "objective": 0.089814692735672, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5, + "regularize": 0.089814692735672, + "step": 640, + "wo_beta": 16.932401657104492 + }, + { + "dpo_loss": 0.6927257776260376, + "epoch": 1.8280585734529995, + "grad_norm": 28.107934645205802, + "learning_rate": 3.9496618307341713e-07, + "logits": -1.256467342376709, + "logps": -81.03665161132812, + "loss": 0.0853, + "objective": 0.0889531597495079, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5541666746139526, + "regularize": 0.0889531597495079, + "step": 645, + "wo_beta": 15.800675392150879 + }, + { + "dpo_loss": 0.6969668865203857, + "epoch": 1.8422295701464337, + "grad_norm": 27.015977070193543, + "learning_rate": 3.8689803043042996e-07, + "logits": -1.2903110980987549, + "logps": -80.92781829833984, + "loss": 0.0847, + "objective": 0.0801667794585228, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5249999761581421, + "regularize": 0.0801667794585228, + "step": 650, + "wo_beta": 15.173321723937988 + }, + { + "epoch": 1.8422295701464337, + "eval_dpo_loss": 0.6974130868911743, + "eval_logits": -1.2229208946228027, + "eval_logps": -86.1129150390625, + "eval_loss": 0.1379525512456894, + "eval_objective": 0.13563887774944305, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.13563887774944305, + "eval_runtime": 538.8083, + "eval_samples_per_second": 10.746, + "eval_steps_per_second": 0.896, + "eval_wo_beta": 15.666037559509277, + "step": 650 + }, + { + "dpo_loss": 0.6896820068359375, + "epoch": 1.8564005668398678, + "grad_norm": 26.639855046988597, + "learning_rate": 3.788607987366069e-07, + "logits": -1.1662521362304688, + "logps": -78.9451675415039, + "loss": 0.081, + "objective": 0.08504978567361832, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.4958333373069763, + "regularize": 0.08504978567361832, + "step": 655, + "wo_beta": 15.233590126037598 + }, + { + "dpo_loss": 0.6891672611236572, + "epoch": 1.8705715635333018, + "grad_norm": 25.995274477757608, + "learning_rate": 3.708566852908418e-07, + "logits": -1.2193191051483154, + "logps": -81.20162200927734, + "loss": 0.0781, + "objective": 0.08211526274681091, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5291666388511658, + "regularize": 0.08211526274681091, + "step": 660, + "wo_beta": 16.372514724731445 + }, + { + "dpo_loss": 0.6908305287361145, + "epoch": 1.8847425602267358, + "grad_norm": 28.66146531985666, + "learning_rate": 3.6288787833783016e-07, + "logits": -1.2218626737594604, + "logps": -80.04493713378906, + "loss": 0.0815, + "objective": 0.08463230729103088, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5416666865348816, + "regularize": 0.08463230729103088, + "step": 665, + "wo_beta": 15.034836769104004 + }, + { + "dpo_loss": 0.6863933801651001, + "epoch": 1.89891355692017, + "grad_norm": 30.111613598581105, + "learning_rate": 3.5495655646982503e-07, + "logits": -1.1576950550079346, + "logps": -78.58309173583984, + "loss": 0.0755, + "objective": 0.07363765686750412, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.47083333134651184, + "regularize": 0.07363765686750412, + "step": 670, + "wo_beta": 16.09279441833496 + }, + { + "dpo_loss": 0.695208728313446, + "epoch": 1.9130845536136043, + "grad_norm": 27.241452477717303, + "learning_rate": 3.470648880310313e-07, + "logits": -1.1648114919662476, + "logps": -79.5347671508789, + "loss": 0.0735, + "objective": 0.07240771502256393, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5249999761581421, + "regularize": 0.07240771502256393, + "step": 675, + "wo_beta": 15.440892219543457 + }, + { + "dpo_loss": 0.6950518488883972, + "epoch": 1.9272555503070383, + "grad_norm": 25.198332305215366, + "learning_rate": 3.3921503052480236e-07, + "logits": -1.2177760601043701, + "logps": -81.27088165283203, + "loss": 0.0778, + "objective": 0.07866664230823517, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5083333253860474, + "regularize": 0.07866664230823517, + "step": 680, + "wo_beta": 14.848203659057617 + }, + { + "dpo_loss": 0.6911803483963013, + "epoch": 1.9414265470004723, + "grad_norm": 26.10726119743999, + "learning_rate": 3.314091300237999e-07, + "logits": -1.1625895500183105, + "logps": -78.15774536132812, + "loss": 0.0738, + "objective": 0.07330299913883209, + "ranking_idealized": 0.5291666388511658, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4749999940395355, + "regularize": 0.07330299913883209, + "step": 685, + "wo_beta": 15.467205047607422 + }, + { + "dpo_loss": 0.6860196590423584, + "epoch": 1.9555975436939064, + "grad_norm": 26.093926175967837, + "learning_rate": 3.236493205832794e-07, + "logits": -1.21792733669281, + "logps": -79.12659454345703, + "loss": 0.071, + "objective": 0.07433832436800003, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5458333492279053, + "regularize": 0.07433832436800003, + "step": 690, + "wo_beta": 15.627902030944824 + }, + { + "dpo_loss": 0.6922653317451477, + "epoch": 1.9697685403873406, + "grad_norm": 26.38661425001647, + "learning_rate": 3.15937723657661e-07, + "logits": -1.1168206930160522, + "logps": -79.83128356933594, + "loss": 0.0723, + "objective": 0.06720028072595596, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.49166667461395264, + "regularize": 0.06720028072595596, + "step": 695, + "wo_beta": 16.023746490478516 + }, + { + "dpo_loss": 0.6856616139411926, + "epoch": 1.9839395370807746, + "grad_norm": 27.380948849082866, + "learning_rate": 3.082764475205442e-07, + "logits": -1.103851079940796, + "logps": -80.37809753417969, + "loss": 0.071, + "objective": 0.0717112347483635, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5166666507720947, + "regularize": 0.0717112347483635, + "step": 700, + "wo_beta": 14.657614707946777 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6979688405990601, + "eval_logits": -1.220837116241455, + "eval_logps": -85.24955749511719, + "eval_loss": 0.1420368105173111, + "eval_objective": 0.14046597480773926, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5253623127937317, + "eval_regularize": 0.14046597480773926, + "eval_runtime": 531.1966, + "eval_samples_per_second": 10.9, + "eval_steps_per_second": 0.909, + "eval_wo_beta": 15.610904693603516, + "step": 700 + }, + { + "dpo_loss": 0.6904897093772888, + "epoch": 1.9981105337742089, + "grad_norm": 27.03253447324609, + "learning_rate": 3.006675866883275e-07, + "logits": -1.0365864038467407, + "logps": -79.36177062988281, + "loss": 0.0704, + "objective": 0.07408583164215088, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 0.07408583164215088, + "step": 705, + "wo_beta": 16.758014678955078 + }, + { + "dpo_loss": 0.6849521398544312, + "epoch": 2.012281530467643, + "grad_norm": 24.956147004394822, + "learning_rate": 2.931132213475884e-07, + "logits": -1.1888701915740967, + "logps": -78.96455383300781, + "loss": 0.0619, + "objective": 0.06422288715839386, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.06422288715839386, + "step": 710, + "wo_beta": 15.760772705078125 + }, + { + "dpo_loss": 0.6961538791656494, + "epoch": 2.026452527161077, + "grad_norm": 27.279846270487834, + "learning_rate": 2.856154167863814e-07, + "logits": -1.1860238313674927, + "logps": -78.40641021728516, + "loss": 0.0631, + "objective": 0.06441039592027664, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.550000011920929, + "regularize": 0.06441039592027664, + "step": 715, + "wo_beta": 14.784539222717285 + }, + { + "dpo_loss": 0.6898289322853088, + "epoch": 2.040623523854511, + "grad_norm": 28.14233189102926, + "learning_rate": 2.7817622282960813e-07, + "logits": -1.1884685754776, + "logps": -79.12120819091797, + "loss": 0.0633, + "objective": 0.06231885775923729, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.06231885775923729, + "step": 720, + "wo_beta": 15.765007972717285 + }, + { + "dpo_loss": 0.6928841471672058, + "epoch": 2.0547945205479454, + "grad_norm": 25.78908501428665, + "learning_rate": 2.707976732786166e-07, + "logits": -1.1958059072494507, + "logps": -81.6028060913086, + "loss": 0.0578, + "objective": 0.06330116838216782, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.06330116838216782, + "step": 725, + "wo_beta": 13.992663383483887 + }, + { + "dpo_loss": 0.6905084252357483, + "epoch": 2.0689655172413794, + "grad_norm": 26.140030636203093, + "learning_rate": 2.6348178535517965e-07, + "logits": -1.2607707977294922, + "logps": -79.21609497070312, + "loss": 0.0598, + "objective": 0.05353347584605217, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.4958333373069763, + "regularize": 0.05353347584605217, + "step": 730, + "wo_beta": 15.095206260681152 + }, + { + "dpo_loss": 0.6901918053627014, + "epoch": 2.0831365139348135, + "grad_norm": 27.302640565922513, + "learning_rate": 2.5623055915000686e-07, + "logits": -1.1885894536972046, + "logps": -78.86723327636719, + "loss": 0.0579, + "objective": 0.05939151346683502, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4833333194255829, + "regularize": 0.05939151346683502, + "step": 735, + "wo_beta": 16.905290603637695 + }, + { + "dpo_loss": 0.6903797388076782, + "epoch": 2.0973075106282475, + "grad_norm": 25.718973789328345, + "learning_rate": 2.490459770759398e-07, + "logits": -1.2478386163711548, + "logps": -79.14292907714844, + "loss": 0.0573, + "objective": 0.05540405213832855, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 0.05540405213832855, + "step": 740, + "wo_beta": 15.3594331741333 + }, + { + "dpo_loss": 0.6952056288719177, + "epoch": 2.1114785073216815, + "grad_norm": 26.12282917762503, + "learning_rate": 2.419300033259798e-07, + "logits": -1.1640416383743286, + "logps": -79.09960174560547, + "loss": 0.0628, + "objective": 0.0631415918469429, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.5541666746139526, + "regularize": 0.0631415918469429, + "step": 745, + "wo_beta": 14.359167098999023 + }, + { + "dpo_loss": 0.6888077259063721, + "epoch": 2.1256495040151155, + "grad_norm": 25.53259897003242, + "learning_rate": 2.3488458333629773e-07, + "logits": -1.2182810306549072, + "logps": -78.26011657714844, + "loss": 0.0546, + "objective": 0.05781084671616554, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 0.05781084671616554, + "step": 750, + "wo_beta": 15.271900177001953 + }, + { + "epoch": 2.1256495040151155, + "eval_dpo_loss": 0.6980140209197998, + "eval_logits": -1.2232871055603027, + "eval_logps": -85.46907806396484, + "eval_loss": 0.14231154322624207, + "eval_objective": 0.14071756601333618, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5258799195289612, + "eval_regularize": 0.14071756601333618, + "eval_runtime": 525.9214, + "eval_samples_per_second": 11.009, + "eval_steps_per_second": 0.918, + "eval_wo_beta": 15.648022651672363, + "step": 750 + }, + { + "dpo_loss": 0.6901395320892334, + "epoch": 2.13982050070855, + "grad_norm": 25.775133405076527, + "learning_rate": 2.2791164325437046e-07, + "logits": -1.2039532661437988, + "logps": -80.76856994628906, + "loss": 0.0536, + "objective": 0.054485421627759933, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5249999761581421, + "regularize": 0.054485421627759933, + "step": 755, + "wo_beta": 16.363035202026367 + }, + { + "dpo_loss": 0.6922858953475952, + "epoch": 2.153991497401984, + "grad_norm": 25.744794188993545, + "learning_rate": 2.21013089412392e-07, + "logits": -1.1505485773086548, + "logps": -77.95565795898438, + "loss": 0.0596, + "objective": 0.056366052478551865, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5, + "regularize": 0.056366052478551865, + "step": 760, + "wo_beta": 14.503907203674316 + }, + { + "dpo_loss": 0.6935012936592102, + "epoch": 2.168162494095418, + "grad_norm": 25.81314805277084, + "learning_rate": 2.1419080780610122e-07, + "logits": -1.195157527923584, + "logps": -79.0260009765625, + "loss": 0.0569, + "objective": 0.05813807621598244, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.05813807621598244, + "step": 765, + "wo_beta": 15.846463203430176 + }, + { + "dpo_loss": 0.6929753422737122, + "epoch": 2.182333490788852, + "grad_norm": 26.17366253681256, + "learning_rate": 2.0744666357916925e-07, + "logits": -1.2156563997268677, + "logps": -79.0594253540039, + "loss": 0.0599, + "objective": 0.06166267395019531, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5083333253860474, + "regularize": 0.06166267395019531, + "step": 770, + "wo_beta": 13.665863037109375 + }, + { + "dpo_loss": 0.6904846429824829, + "epoch": 2.196504487482286, + "grad_norm": 24.80909315966262, + "learning_rate": 2.0078250051328782e-07, + "logits": -1.20059072971344, + "logps": -79.86570739746094, + "loss": 0.0593, + "objective": 0.05707041174173355, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.574999988079071, + "regularize": 0.05707041174173355, + "step": 775, + "wo_beta": 16.53993797302246 + }, + { + "dpo_loss": 0.6920241117477417, + "epoch": 2.21067548417572, + "grad_norm": 26.21741329158667, + "learning_rate": 1.942001405240979e-07, + "logits": -1.1453113555908203, + "logps": -79.6847152709961, + "loss": 0.0544, + "objective": 0.05578133091330528, + "ranking_idealized": 0.512499988079071, + "ranking_idealized_expo": 0.44583332538604736, + "ranking_simple": 0.4375, + "regularize": 0.05578133091330528, + "step": 780, + "wo_beta": 15.170312881469727 + }, + { + "dpo_loss": 0.6935942769050598, + "epoch": 2.2248464808691546, + "grad_norm": 28.321911906643972, + "learning_rate": 1.877013831630961e-07, + "logits": -1.1368038654327393, + "logps": -79.92477416992188, + "loss": 0.0563, + "objective": 0.0578266978263855, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4791666567325592, + "regularize": 0.0578266978263855, + "step": 785, + "wo_beta": 14.784603118896484 + }, + { + "dpo_loss": 0.6887209415435791, + "epoch": 2.2390174775625886, + "grad_norm": 25.1538491328267, + "learning_rate": 1.812880051256551e-07, + "logits": -1.1384888887405396, + "logps": -80.59889221191406, + "loss": 0.0504, + "objective": 0.04905276745557785, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.550000011920929, + "regularize": 0.04905276745557785, + "step": 790, + "wo_beta": 14.593072891235352 + }, + { + "dpo_loss": 0.6941591501235962, + "epoch": 2.2531884742560226, + "grad_norm": 25.628975208912717, + "learning_rate": 1.7496175976529337e-07, + "logits": -1.1934906244277954, + "logps": -81.73139953613281, + "loss": 0.053, + "objective": 0.05859142541885376, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5, + "regularize": 0.05859142541885376, + "step": 795, + "wo_beta": 13.79269790649414 + }, + { + "dpo_loss": 0.6919682621955872, + "epoch": 2.2673594709494567, + "grad_norm": 26.293732850411818, + "learning_rate": 1.6872437661432516e-07, + "logits": -1.2084691524505615, + "logps": -80.88973999023438, + "loss": 0.0531, + "objective": 0.05279294773936272, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5166666507720947, + "regularize": 0.05279294773936272, + "step": 800, + "wo_beta": 14.340437889099121 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6981291174888611, + "eval_logits": -1.220612645149231, + "eval_logps": -86.13679504394531, + "eval_loss": 0.138593852519989, + "eval_objective": 0.13714565336704254, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.13714565336704254, + "eval_runtime": 503.3749, + "eval_samples_per_second": 11.502, + "eval_steps_per_second": 0.96, + "eval_wo_beta": 15.623366355895996, + "step": 800 + }, + { + "dpo_loss": 0.6895002126693726, + "epoch": 2.2815304676428907, + "grad_norm": 26.85852458075238, + "learning_rate": 1.62577560911024e-07, + "logits": -1.1975409984588623, + "logps": -79.75126647949219, + "loss": 0.0473, + "objective": 0.047933317720890045, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5166666507720947, + "regularize": 0.047933317720890045, + "step": 805, + "wo_beta": 15.455560684204102 + }, + { + "dpo_loss": 0.693041980266571, + "epoch": 2.295701464336325, + "grad_norm": 27.109828632522476, + "learning_rate": 1.565229931334277e-07, + "logits": -1.2860682010650635, + "logps": -79.39039611816406, + "loss": 0.051, + "objective": 0.04613161459565163, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5458333492279053, + "regularize": 0.04613161459565163, + "step": 810, + "wo_beta": 13.837719917297363 + }, + { + "dpo_loss": 0.6961421966552734, + "epoch": 2.309872461029759, + "grad_norm": 26.77232369418631, + "learning_rate": 1.5056232853991208e-07, + "logits": -1.2426903247833252, + "logps": -80.33802032470703, + "loss": 0.0483, + "objective": 0.04774492606520653, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.4625000059604645, + "regularize": 0.04774492606520653, + "step": 815, + "wo_beta": 15.377904891967773 + }, + { + "dpo_loss": 0.6943568587303162, + "epoch": 2.324043457723193, + "grad_norm": 25.84415791966093, + "learning_rate": 1.4469719671666043e-07, + "logits": -1.1784952878952026, + "logps": -79.52135467529297, + "loss": 0.0497, + "objective": 0.0464615561068058, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5041666626930237, + "regularize": 0.0464615561068058, + "step": 820, + "wo_beta": 14.641592979431152 + }, + { + "dpo_loss": 0.6941722631454468, + "epoch": 2.3382144544166272, + "grad_norm": 26.057445300358456, + "learning_rate": 1.389292011321498e-07, + "logits": -1.1956678628921509, + "logps": -78.97592163085938, + "loss": 0.0489, + "objective": 0.04843177646398544, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5375000238418579, + "regularize": 0.04843177646398544, + "step": 825, + "wo_beta": 15.882107734680176 + }, + { + "dpo_loss": 0.6919335722923279, + "epoch": 2.3523854511100613, + "grad_norm": 25.587425832586177, + "learning_rate": 1.3325991869878012e-07, + "logits": -1.1966559886932373, + "logps": -81.00519561767578, + "loss": 0.0487, + "objective": 0.05618049576878548, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.05618049576878548, + "step": 830, + "wo_beta": 15.746501922607422 + }, + { + "dpo_loss": 0.6917215585708618, + "epoch": 2.3665564478034957, + "grad_norm": 25.756644403885232, + "learning_rate": 1.2769089934176126e-07, + "logits": -1.168601632118225, + "logps": -80.84972381591797, + "loss": 0.0488, + "objective": 0.052498627454042435, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.512499988079071, + "regularize": 0.052498627454042435, + "step": 835, + "wo_beta": 14.608040809631348 + }, + { + "dpo_loss": 0.6898554563522339, + "epoch": 2.3807274444969297, + "grad_norm": 25.072094771225707, + "learning_rate": 1.222236655753791e-07, + "logits": -1.1249865293502808, + "logps": -80.45842742919922, + "loss": 0.0434, + "objective": 0.04277409613132477, + "ranking_idealized": 0.5249999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4625000059604645, + "regularize": 0.04277409613132477, + "step": 840, + "wo_beta": 16.011308670043945 + }, + { + "dpo_loss": 0.6897058486938477, + "epoch": 2.3948984411903638, + "grad_norm": 31.2138593781791, + "learning_rate": 1.1685971208675538e-07, + "logits": -1.1826022863388062, + "logps": -81.36385345458984, + "loss": 0.0438, + "objective": 0.04376084357500076, + "ranking_idealized": 0.5958333611488342, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5333333611488342, + "regularize": 0.04376084357500076, + "step": 845, + "wo_beta": 15.694497108459473 + }, + { + "dpo_loss": 0.689830482006073, + "epoch": 2.409069437883798, + "grad_norm": 26.424193566129606, + "learning_rate": 1.1160050532721527e-07, + "logits": -1.2078933715820312, + "logps": -79.71755981445312, + "loss": 0.0444, + "objective": 0.04779530316591263, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.49166667461395264, + "regularize": 0.04779530316591263, + "step": 850, + "wo_beta": 15.619561195373535 + }, + { + "epoch": 2.409069437883798, + "eval_dpo_loss": 0.6980399489402771, + "eval_logits": -1.2270959615707397, + "eval_logps": -86.03622436523438, + "eval_loss": 0.13948112726211548, + "eval_objective": 0.1381867229938507, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.1381867229938507, + "eval_runtime": 508.2715, + "eval_samples_per_second": 11.392, + "eval_steps_per_second": 0.95, + "eval_wo_beta": 15.647224426269531, + "step": 850 + }, + { + "dpo_loss": 0.690664529800415, + "epoch": 2.423240434577232, + "grad_norm": 26.086004792829357, + "learning_rate": 1.0644748311137375e-07, + "logits": -1.2208842039108276, + "logps": -79.23947143554688, + "loss": 0.0431, + "objective": 0.044093988835811615, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.5249999761581421, + "regularize": 0.044093988835811615, + "step": 855, + "wo_beta": 14.724575996398926 + }, + { + "dpo_loss": 0.6878847479820251, + "epoch": 2.4374114312706663, + "grad_norm": 24.819758120044014, + "learning_rate": 1.0140205422405212e-07, + "logits": -1.172597050666809, + "logps": -80.47863006591797, + "loss": 0.0425, + "objective": 0.044025711715221405, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5333333611488342, + "regularize": 0.044025711715221405, + "step": 860, + "wo_beta": 15.323599815368652 + }, + { + "dpo_loss": 0.6902381777763367, + "epoch": 2.4515824279641003, + "grad_norm": 27.313034441936136, + "learning_rate": 9.646559803512993e-08, + "logits": -1.2031606435775757, + "logps": -79.59320831298828, + "loss": 0.0444, + "objective": 0.04272008314728737, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.04272008314728737, + "step": 865, + "wo_beta": 15.875487327575684 + }, + { + "dpo_loss": 0.6910372376441956, + "epoch": 2.4657534246575343, + "grad_norm": 25.76666127477957, + "learning_rate": 9.163946412243895e-08, + "logits": -1.2454520463943481, + "logps": -80.33094024658203, + "loss": 0.0442, + "objective": 0.04635915905237198, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.04635915905237198, + "step": 870, + "wo_beta": 14.93254566192627 + }, + { + "dpo_loss": 0.6905195713043213, + "epoch": 2.4799244213509684, + "grad_norm": 25.65493367025704, + "learning_rate": 8.692497190280224e-08, + "logits": -1.193867802619934, + "logps": -79.73404693603516, + "loss": 0.044, + "objective": 0.04675581306219101, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.5625, + "regularize": 0.04675581306219101, + "step": 875, + "wo_beta": 16.489763259887695 + }, + { + "dpo_loss": 0.6905779242515564, + "epoch": 2.4940954180444024, + "grad_norm": 26.621663140091542, + "learning_rate": 8.232341027131883e-08, + "logits": -1.1066038608551025, + "logps": -79.80467224121094, + "loss": 0.0446, + "objective": 0.046583421528339386, + "ranking_idealized": 0.5625, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.046583421528339386, + "step": 880, + "wo_beta": 17.46852684020996 + }, + { + "dpo_loss": 0.6917292475700378, + "epoch": 2.5082664147378364, + "grad_norm": 24.02209120686893, + "learning_rate": 7.783603724899257e-08, + "logits": -1.25592041015625, + "logps": -79.1759262084961, + "loss": 0.0422, + "objective": 0.04294423386454582, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.04294423386454582, + "step": 885, + "wo_beta": 16.415306091308594 + }, + { + "dpo_loss": 0.6880825161933899, + "epoch": 2.5224374114312704, + "grad_norm": 26.181840029139675, + "learning_rate": 7.346407963880136e-08, + "logits": -1.1791417598724365, + "logps": -78.21730041503906, + "loss": 0.0424, + "objective": 0.03773224726319313, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5, + "regularize": 0.03773224726319313, + "step": 890, + "wo_beta": 13.494309425354004 + }, + { + "dpo_loss": 0.692958414554596, + "epoch": 2.536608408124705, + "grad_norm": 27.615133075738825, + "learning_rate": 6.92087326903022e-08, + "logits": -1.175589680671692, + "logps": -80.6869888305664, + "loss": 0.0444, + "objective": 0.0476791188120842, + "ranking_idealized": 0.5458333492279053, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.48750001192092896, + "regularize": 0.0476791188120842, + "step": 895, + "wo_beta": 16.41474151611328 + }, + { + "dpo_loss": 0.6935379505157471, + "epoch": 2.550779404818139, + "grad_norm": 25.263999580012257, + "learning_rate": 6.507115977286143e-08, + "logits": -1.1382538080215454, + "logps": -79.20881652832031, + "loss": 0.0438, + "objective": 0.044265471398830414, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5208333134651184, + "regularize": 0.044265471398830414, + "step": 900, + "wo_beta": 15.096195220947266 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.6975382566452026, + "eval_logits": -1.2295913696289062, + "eval_logps": -85.88396453857422, + "eval_loss": 0.13868437707424164, + "eval_objective": 0.13740767538547516, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.13740767538547516, + "eval_runtime": 525.8368, + "eval_samples_per_second": 11.011, + "eval_steps_per_second": 0.919, + "eval_wo_beta": 15.634546279907227, + "step": 900 + }, + { + "dpo_loss": 0.6917089819908142, + "epoch": 2.564950401511573, + "grad_norm": 25.44195334625603, + "learning_rate": 6.105249205760127e-08, + "logits": -1.2037063837051392, + "logps": -79.04875183105469, + "loss": 0.0411, + "objective": 0.03601410239934921, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 0.03601410239934921, + "step": 905, + "wo_beta": 14.861380577087402 + }, + { + "dpo_loss": 0.6932801008224487, + "epoch": 2.579121398205007, + "grad_norm": 26.495925146665332, + "learning_rate": 5.7153828208148846e-08, + "logits": -1.1827551126480103, + "logps": -81.922607421875, + "loss": 0.0424, + "objective": 0.04883956164121628, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.04883956164121628, + "step": 910, + "wo_beta": 15.852696418762207 + }, + { + "dpo_loss": 0.6898232102394104, + "epoch": 2.593292394898441, + "grad_norm": 25.88822340642525, + "learning_rate": 5.337623408027292e-08, + "logits": -1.2935634851455688, + "logps": -80.87789916992188, + "loss": 0.0403, + "objective": 0.040093984454870224, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.040093984454870224, + "step": 915, + "wo_beta": 14.905534744262695 + }, + { + "dpo_loss": 0.6920287013053894, + "epoch": 2.6074633915918755, + "grad_norm": 25.364010577767672, + "learning_rate": 4.972074243048896e-08, + "logits": -1.1468993425369263, + "logps": -79.89569854736328, + "loss": 0.0396, + "objective": 0.03967604413628578, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5249999761581421, + "regularize": 0.03967604413628578, + "step": 920, + "wo_beta": 15.246692657470703 + }, + { + "dpo_loss": 0.6928901076316833, + "epoch": 2.6216343882853095, + "grad_norm": 27.967184575096596, + "learning_rate": 4.6188352633713956e-08, + "logits": -1.1743673086166382, + "logps": -80.17101287841797, + "loss": 0.0417, + "objective": 0.04370425269007683, + "ranking_idealized": 0.574999988079071, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.47083333134651184, + "regularize": 0.04370425269007683, + "step": 925, + "wo_beta": 16.336292266845703 + }, + { + "dpo_loss": 0.688522458076477, + "epoch": 2.6358053849787435, + "grad_norm": 26.578359144982873, + "learning_rate": 4.2780030410047796e-08, + "logits": -1.1617387533187866, + "logps": -79.97476196289062, + "loss": 0.0365, + "objective": 0.03662450239062309, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.44583332538604736, + "ranking_simple": 0.44583332538604736, + "regularize": 0.03662450239062309, + "step": 930, + "wo_beta": 16.801166534423828 + }, + { + "dpo_loss": 0.6928302645683289, + "epoch": 2.6499763816721775, + "grad_norm": 26.6756558913633, + "learning_rate": 3.949670756075446e-08, + "logits": -1.1548212766647339, + "logps": -78.78431701660156, + "loss": 0.0364, + "objective": 0.0356716513633728, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.574999988079071, + "regularize": 0.0356716513633728, + "step": 935, + "wo_beta": 15.733369827270508 + }, + { + "dpo_loss": 0.6884638071060181, + "epoch": 2.6641473783656116, + "grad_norm": 26.11837122854028, + "learning_rate": 3.63392817135173e-08, + "logits": -1.213140845298767, + "logps": -81.39899444580078, + "loss": 0.0357, + "objective": 0.03838236257433891, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5333333611488342, + "regularize": 0.03838236257433891, + "step": 940, + "wo_beta": 16.71453094482422 + }, + { + "dpo_loss": 0.6904810070991516, + "epoch": 2.678318375059046, + "grad_norm": 26.48243005501328, + "learning_rate": 3.330861607703611e-08, + "logits": -1.2477443218231201, + "logps": -80.07948303222656, + "loss": 0.0369, + "objective": 0.03517834097146988, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.03517834097146988, + "step": 945, + "wo_beta": 15.665254592895508 + }, + { + "dpo_loss": 0.6894643902778625, + "epoch": 2.69248937175248, + "grad_norm": 26.269248260275482, + "learning_rate": 3.040553920503502e-08, + "logits": -1.1376032829284668, + "logps": -80.89375305175781, + "loss": 0.0384, + "objective": 0.03873926401138306, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5416666865348816, + "regularize": 0.03873926401138306, + "step": 950, + "wo_beta": 14.65186882019043 + }, + { + "epoch": 2.69248937175248, + "eval_dpo_loss": 0.6974536180496216, + "eval_logits": -1.2285144329071045, + "eval_logps": -85.95899963378906, + "eval_loss": 0.13796193897724152, + "eval_objective": 0.13680347800254822, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.523809552192688, + "eval_regularize": 0.13680347800254822, + "eval_runtime": 502.396, + "eval_samples_per_second": 11.525, + "eval_steps_per_second": 0.961, + "eval_wo_beta": 15.642508506774902, + "step": 950 + }, + { + "dpo_loss": 0.6890572905540466, + "epoch": 2.706660368445914, + "grad_norm": 24.74397275822761, + "learning_rate": 2.7630844769743756e-08, + "logits": -1.2225416898727417, + "logps": -79.87822723388672, + "loss": 0.0403, + "objective": 0.04285174608230591, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.4749999940395355, + "regularize": 0.04285174608230591, + "step": 955, + "wo_beta": 13.80820369720459 + }, + { + "dpo_loss": 0.6908868551254272, + "epoch": 2.720831365139348, + "grad_norm": 25.907101929875015, + "learning_rate": 2.4985291344915673e-08, + "logits": -1.1964094638824463, + "logps": -79.958740234375, + "loss": 0.0384, + "objective": 0.03498096391558647, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5041666626930237, + "regularize": 0.03498096391558647, + "step": 960, + "wo_beta": 16.096843719482422 + }, + { + "dpo_loss": 0.6898122429847717, + "epoch": 2.735002361832782, + "grad_norm": 26.015895295989438, + "learning_rate": 2.2469602198441573e-08, + "logits": -1.2220391035079956, + "logps": -80.10702514648438, + "loss": 0.0368, + "objective": 0.03775167092680931, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5666666626930237, + "regularize": 0.03775167092680931, + "step": 965, + "wo_beta": 14.61376953125 + }, + { + "dpo_loss": 0.6917709112167358, + "epoch": 2.7491733585262166, + "grad_norm": 24.33103792831753, + "learning_rate": 2.008446509461498e-08, + "logits": -1.2293510437011719, + "logps": -81.0619888305664, + "loss": 0.0341, + "objective": 0.03296136483550072, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5208333134651184, + "regularize": 0.03296136483550072, + "step": 970, + "wo_beta": 14.957200050354004 + }, + { + "dpo_loss": 0.6909447908401489, + "epoch": 2.7633443552196506, + "grad_norm": 24.892680282575437, + "learning_rate": 1.7830532106104746e-08, + "logits": -1.1391520500183105, + "logps": -79.50247955322266, + "loss": 0.0358, + "objective": 0.03571467101573944, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5416666865348816, + "regularize": 0.03571467101573944, + "step": 975, + "wo_beta": 15.747049331665039 + }, + { + "dpo_loss": 0.6906387209892273, + "epoch": 2.7775153519130846, + "grad_norm": 25.891776024282194, + "learning_rate": 1.570841943568446e-08, + "logits": -1.2599250078201294, + "logps": -78.82478332519531, + "loss": 0.0365, + "objective": 0.03682435303926468, + "ranking_idealized": 0.5916666388511658, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.49166667461395264, + "regularize": 0.03682435303926468, + "step": 980, + "wo_beta": 14.397340774536133 + }, + { + "dpo_loss": 0.6933729648590088, + "epoch": 2.7916863486065187, + "grad_norm": 24.71596998222205, + "learning_rate": 1.3718707247769134e-08, + "logits": -1.1248877048492432, + "logps": -77.72516632080078, + "loss": 0.038, + "objective": 0.03822270780801773, + "ranking_idealized": 0.5791666507720947, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.48750001192092896, + "regularize": 0.03822270780801773, + "step": 985, + "wo_beta": 14.327728271484375 + }, + { + "dpo_loss": 0.691889762878418, + "epoch": 2.8058573452999527, + "grad_norm": 26.185929406261582, + "learning_rate": 1.1861939509803686e-08, + "logits": -1.1771855354309082, + "logps": -81.14643859863281, + "loss": 0.0369, + "objective": 0.036898624151945114, + "ranking_idealized": 0.5666666626930237, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.47083333134651184, + "regularize": 0.036898624151945114, + "step": 990, + "wo_beta": 15.375889778137207 + }, + { + "dpo_loss": 0.6891864538192749, + "epoch": 2.820028341993387, + "grad_norm": 24.803225677825235, + "learning_rate": 1.0138623843548078e-08, + "logits": -1.2396986484527588, + "logps": -79.1412353515625, + "loss": 0.0365, + "objective": 0.04024568572640419, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5458333492279053, + "regularize": 0.04024568572640419, + "step": 995, + "wo_beta": 16.440141677856445 + }, + { + "dpo_loss": 0.6907335519790649, + "epoch": 2.8341993386868207, + "grad_norm": 24.80804716491088, + "learning_rate": 8.54923138629815e-09, + "logits": -1.1814649105072021, + "logps": -78.3318862915039, + "loss": 0.0375, + "objective": 0.03398551046848297, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5208333134651184, + "regularize": 0.03398551046848297, + "step": 1000, + "wo_beta": 14.515811920166016 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.6973779201507568, + "eval_logits": -1.2304595708847046, + "eval_logps": -85.99760437011719, + "eval_loss": 0.1379886120557785, + "eval_objective": 0.1368565410375595, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.1368565410375595, + "eval_runtime": 504.9482, + "eval_samples_per_second": 11.467, + "eval_steps_per_second": 0.957, + "eval_wo_beta": 15.63548755645752, + "step": 1000 + }, + { + "dpo_loss": 0.6911761164665222, + "epoch": 2.848370335380255, + "grad_norm": 27.32667601221845, + "learning_rate": 7.09419666208183e-09, + "logits": -1.1803662776947021, + "logps": -78.7650375366211, + "loss": 0.036, + "objective": 0.03725501522421837, + "ranking_idealized": 0.6000000238418579, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5208333134651184, + "regularize": 0.03725501522421837, + "step": 1005, + "wo_beta": 15.2937593460083 + }, + { + "dpo_loss": 0.6888595223426819, + "epoch": 2.862541332073689, + "grad_norm": 26.14400831689978, + "learning_rate": 5.773917462864264e-09, + "logits": -1.2407745122909546, + "logps": -79.07453918457031, + "loss": 0.0359, + "objective": 0.03689141198992729, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5291666388511658, + "regularize": 0.03689141198992729, + "step": 1010, + "wo_beta": 15.180621147155762 + }, + { + "dpo_loss": 0.6912004947662354, + "epoch": 2.8767123287671232, + "grad_norm": 24.9602315307722, + "learning_rate": 4.588754739795586e-09, + "logits": -1.1721571683883667, + "logps": -78.31599426269531, + "loss": 0.0354, + "objective": 0.03823023661971092, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.550000011920929, + "regularize": 0.03823023661971092, + "step": 1015, + "wo_beta": 14.313817977905273 + }, + { + "dpo_loss": 0.6896302700042725, + "epoch": 2.8908833254605573, + "grad_norm": 24.85258883289883, + "learning_rate": 3.53903250453047e-09, + "logits": -1.1410295963287354, + "logps": -80.05741882324219, + "loss": 0.0343, + "objective": 0.03470051661133766, + "ranking_idealized": 0.5833333134651184, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5166666507720947, + "regularize": 0.03470051661133766, + "step": 1020, + "wo_beta": 17.722339630126953 + }, + { + "dpo_loss": 0.6912213563919067, + "epoch": 2.9050543221539913, + "grad_norm": 25.437671735836517, + "learning_rate": 2.6250377406467627e-09, + "logits": -1.2291027307510376, + "logps": -80.00859832763672, + "loss": 0.0379, + "objective": 0.037315838038921356, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5041666626930237, + "regularize": 0.037315838038921356, + "step": 1025, + "wo_beta": 14.656061172485352 + }, + { + "dpo_loss": 0.6911433935165405, + "epoch": 2.9192253188474258, + "grad_norm": 24.681518212372314, + "learning_rate": 1.8470203251865768e-09, + "logits": -1.2523103952407837, + "logps": -80.20305633544922, + "loss": 0.035, + "objective": 0.03597547858953476, + "ranking_idealized": 0.5416666865348816, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4791666567325592, + "regularize": 0.03597547858953476, + "step": 1030, + "wo_beta": 16.243247985839844 + }, + { + "dpo_loss": 0.6904833316802979, + "epoch": 2.9333963155408598, + "grad_norm": 26.808499612926756, + "learning_rate": 1.2051929603428823e-09, + "logits": -1.2276477813720703, + "logps": -80.6124496459961, + "loss": 0.0344, + "objective": 0.03077917918562889, + "ranking_idealized": 0.625, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5041666626930237, + "regularize": 0.03077917918562889, + "step": 1035, + "wo_beta": 14.297567367553711 + }, + { + "dpo_loss": 0.6902684569358826, + "epoch": 2.947567312234294, + "grad_norm": 24.71043561481991, + "learning_rate": 6.997311153086882e-10, + "logits": -1.227773904800415, + "logps": -80.38175201416016, + "loss": 0.0364, + "objective": 0.036134228110313416, + "ranking_idealized": 0.5708333253860474, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.48750001192092896, + "regularize": 0.036134228110313416, + "step": 1040, + "wo_beta": 16.110403060913086 + }, + { + "dpo_loss": 0.6894943118095398, + "epoch": 2.961738308927728, + "grad_norm": 26.305013618654215, + "learning_rate": 3.3077297830541585e-10, + "logits": -1.1821495294570923, + "logps": -81.93363189697266, + "loss": 0.0371, + "objective": 0.04041092470288277, + "ranking_idealized": 0.6208333373069763, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5541666746139526, + "regularize": 0.04041092470288277, + "step": 1045, + "wo_beta": 17.30424690246582 + }, + { + "dpo_loss": 0.6926708221435547, + "epoch": 2.975909305621162, + "grad_norm": 27.660126015515125, + "learning_rate": 9.841941880361914e-11, + "logits": -1.2283350229263306, + "logps": -78.42631530761719, + "loss": 0.0397, + "objective": 0.03637199103832245, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.550000011920929, + "regularize": 0.03637199103832245, + "step": 1050, + "wo_beta": 14.132574081420898 + }, + { + "epoch": 2.975909305621162, + "eval_dpo_loss": 0.697369396686554, + "eval_logits": -1.230570673942566, + "eval_logps": -85.98023223876953, + "eval_loss": 0.13814175128936768, + "eval_objective": 0.13700547814369202, + "eval_ranking_idealized": 0.6024844646453857, + "eval_ranking_idealized_expo": 0.5232919454574585, + "eval_ranking_simple": 0.5243270993232727, + "eval_regularize": 0.13700547814369202, + "eval_runtime": 530.5394, + "eval_samples_per_second": 10.913, + "eval_steps_per_second": 0.91, + "eval_wo_beta": 15.63470458984375, + "step": 1050 + }, + { + "dpo_loss": 0.689972996711731, + "epoch": 2.9900803023145963, + "grad_norm": 25.71242634224602, + "learning_rate": 2.7339599464326622e-12, + "logits": -1.2016465663909912, + "logps": -79.08844757080078, + "loss": 0.0389, + "objective": 0.03705615550279617, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5541666746139526, + "regularize": 0.03705615550279617, + "step": 1055, + "wo_beta": 14.549761772155762 + }, + { + "epoch": 2.992914501653283, + "step": 1056, + "total_flos": 0.0, + "train_loss": 0.08480868444806247, + "train_runtime": 47353.1169, + "train_samples_per_second": 3.218, + "train_steps_per_second": 0.022 + } + ], + "logging_steps": 5, + "max_steps": 1056, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}