|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.992914501653283, |
|
"eval_steps": 50, |
|
"global_step": 1056, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 36.794102305076855, |
|
"learning_rate": 9.433962264150943e-09, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 0.0051, |
|
"objective": 0.0046141319908201694, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.0046141319908201694, |
|
"step": 1, |
|
"wo_beta": 14.841486930847168 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930367350578308, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 51.56528279298989, |
|
"learning_rate": 4.7169811320754715e-08, |
|
"logits": -1.4291929006576538, |
|
"logps": -83.85256958007812, |
|
"loss": 0.0058, |
|
"objective": 0.005918528418987989, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4895833432674408, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.005918528418987989, |
|
"step": 5, |
|
"wo_beta": 16.667278289794922 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930564641952515, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 43.62540826850091, |
|
"learning_rate": 9.433962264150943e-08, |
|
"logits": -1.4014313220977783, |
|
"logps": -84.90540313720703, |
|
"loss": 0.0065, |
|
"objective": 0.00607979716733098, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.00607979716733098, |
|
"step": 10, |
|
"wo_beta": 15.295255661010742 |
|
}, |
|
{ |
|
"dpo_loss": 0.691772997379303, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 40.579476886356176, |
|
"learning_rate": 1.4150943396226414e-07, |
|
"logits": -1.5395350456237793, |
|
"logps": -84.67674255371094, |
|
"loss": 0.0077, |
|
"objective": 0.007744006346911192, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.007744006346911192, |
|
"step": 15, |
|
"wo_beta": 15.72358512878418 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908682584762573, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 38.45055261776428, |
|
"learning_rate": 1.8867924528301886e-07, |
|
"logits": -1.3619084358215332, |
|
"logps": -83.87267303466797, |
|
"loss": 0.0106, |
|
"objective": 0.011018705554306507, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.011018705554306507, |
|
"step": 20, |
|
"wo_beta": 16.501863479614258 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917246580123901, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 37.49075261903623, |
|
"learning_rate": 2.3584905660377358e-07, |
|
"logits": -1.366659164428711, |
|
"logps": -84.04557037353516, |
|
"loss": 0.0144, |
|
"objective": 0.012653553858399391, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.012653553858399391, |
|
"step": 25, |
|
"wo_beta": 15.649717330932617 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906312704086304, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 35.42831042318107, |
|
"learning_rate": 2.830188679245283e-07, |
|
"logits": -1.4202715158462524, |
|
"logps": -84.00289154052734, |
|
"loss": 0.0156, |
|
"objective": 0.015595527365803719, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.015595527365803719, |
|
"step": 30, |
|
"wo_beta": 16.955543518066406 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931964755058289, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 37.327321600930496, |
|
"learning_rate": 3.30188679245283e-07, |
|
"logits": -1.3935037851333618, |
|
"logps": -83.39187622070312, |
|
"loss": 0.0202, |
|
"objective": 0.021191226318478584, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.021191226318478584, |
|
"step": 35, |
|
"wo_beta": 16.169347763061523 |
|
}, |
|
{ |
|
"dpo_loss": 0.693729817867279, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 41.6880498675233, |
|
"learning_rate": 3.773584905660377e-07, |
|
"logits": -1.381697177886963, |
|
"logps": -83.91118621826172, |
|
"loss": 0.0228, |
|
"objective": 0.02042653225362301, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.02042653225362301, |
|
"step": 40, |
|
"wo_beta": 14.309080123901367 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919765472412109, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 41.11048762433909, |
|
"learning_rate": 4.2452830188679244e-07, |
|
"logits": -1.3955552577972412, |
|
"logps": -84.25520324707031, |
|
"loss": 0.027, |
|
"objective": 0.025382202118635178, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.025382202118635178, |
|
"step": 45, |
|
"wo_beta": 14.21595287322998 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911224722862244, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 41.07625280062658, |
|
"learning_rate": 4.7169811320754717e-07, |
|
"logits": -1.4127604961395264, |
|
"logps": -85.3918685913086, |
|
"loss": 0.0351, |
|
"objective": 0.03202561289072037, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03202561289072037, |
|
"step": 50, |
|
"wo_beta": 15.589811325073242 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6926834583282471, |
|
"eval_logits": -1.391736626625061, |
|
"eval_logps": -91.23294067382812, |
|
"eval_loss": 0.02213538996875286, |
|
"eval_objective": 0.022384027019143105, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5212215185165405, |
|
"eval_regularize": 0.022384027019143105, |
|
"eval_runtime": 470.1076, |
|
"eval_samples_per_second": 12.316, |
|
"eval_steps_per_second": 1.027, |
|
"eval_wo_beta": 16.221710205078125, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6922997832298279, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 36.466581476765526, |
|
"learning_rate": 5.188679245283019e-07, |
|
"logits": -1.3620656728744507, |
|
"logps": -84.91451263427734, |
|
"loss": 0.0367, |
|
"objective": 0.0405682697892189, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.0405682697892189, |
|
"step": 55, |
|
"wo_beta": 15.095004081726074 |
|
}, |
|
{ |
|
"dpo_loss": 0.6875351071357727, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 36.25782748515131, |
|
"learning_rate": 5.660377358490566e-07, |
|
"logits": -1.28928804397583, |
|
"logps": -85.71366119384766, |
|
"loss": 0.0403, |
|
"objective": 0.04035286232829094, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.04035286232829094, |
|
"step": 60, |
|
"wo_beta": 14.607115745544434 |
|
}, |
|
{ |
|
"dpo_loss": 0.6947705149650574, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 41.25867915272223, |
|
"learning_rate": 6.132075471698112e-07, |
|
"logits": -1.3798266649246216, |
|
"logps": -83.1692123413086, |
|
"loss": 0.0491, |
|
"objective": 0.050007414072752, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.050007414072752, |
|
"step": 65, |
|
"wo_beta": 14.976885795593262 |
|
}, |
|
{ |
|
"dpo_loss": 0.6880966424942017, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 35.20333705483616, |
|
"learning_rate": 6.60377358490566e-07, |
|
"logits": -1.4017753601074219, |
|
"logps": -85.73289489746094, |
|
"loss": 0.0551, |
|
"objective": 0.059768859297037125, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.059768859297037125, |
|
"step": 70, |
|
"wo_beta": 15.204180717468262 |
|
}, |
|
{ |
|
"dpo_loss": 0.6949416995048523, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 35.61853042350494, |
|
"learning_rate": 7.075471698113207e-07, |
|
"logits": -1.321311593055725, |
|
"logps": -85.34779357910156, |
|
"loss": 0.0579, |
|
"objective": 0.06061805784702301, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.06061805784702301, |
|
"step": 75, |
|
"wo_beta": 14.980683326721191 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930631995201111, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 34.9536345678453, |
|
"learning_rate": 7.547169811320754e-07, |
|
"logits": -1.4264112710952759, |
|
"logps": -84.01344299316406, |
|
"loss": 0.0626, |
|
"objective": 0.062408361583948135, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.062408361583948135, |
|
"step": 80, |
|
"wo_beta": 16.357084274291992 |
|
}, |
|
{ |
|
"dpo_loss": 0.6939026117324829, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 35.4653089608865, |
|
"learning_rate": 8.018867924528302e-07, |
|
"logits": -1.4041804075241089, |
|
"logps": -83.52224731445312, |
|
"loss": 0.0695, |
|
"objective": 0.07861108332872391, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.07861108332872391, |
|
"step": 85, |
|
"wo_beta": 14.987756729125977 |
|
}, |
|
{ |
|
"dpo_loss": 0.6886675357818604, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 38.910010820592774, |
|
"learning_rate": 8.490566037735849e-07, |
|
"logits": -1.5007805824279785, |
|
"logps": -84.52466583251953, |
|
"loss": 0.0806, |
|
"objective": 0.08859896659851074, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.08859896659851074, |
|
"step": 90, |
|
"wo_beta": 15.482732772827148 |
|
}, |
|
{ |
|
"dpo_loss": 0.6961393356323242, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 38.50762322649532, |
|
"learning_rate": 8.962264150943396e-07, |
|
"logits": -1.4152452945709229, |
|
"logps": -83.7827377319336, |
|
"loss": 0.0851, |
|
"objective": 0.08412078768014908, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.08412078768014908, |
|
"step": 95, |
|
"wo_beta": 16.229019165039062 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928918361663818, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 34.07886171444254, |
|
"learning_rate": 9.433962264150943e-07, |
|
"logits": -1.2942625284194946, |
|
"logps": -81.22164916992188, |
|
"loss": 0.0877, |
|
"objective": 0.08352937549352646, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.08352937549352646, |
|
"step": 100, |
|
"wo_beta": 15.187151908874512 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6921994090080261, |
|
"eval_logits": -1.3862521648406982, |
|
"eval_logps": -88.66019439697266, |
|
"eval_loss": 0.04334083944559097, |
|
"eval_objective": 0.04473063722252846, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.04473063722252846, |
|
"eval_runtime": 472.2375, |
|
"eval_samples_per_second": 12.261, |
|
"eval_steps_per_second": 1.023, |
|
"eval_wo_beta": 16.16818618774414, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6989858150482178, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 34.124768906394316, |
|
"learning_rate": 9.90566037735849e-07, |
|
"logits": -1.4883809089660645, |
|
"logps": -83.63202667236328, |
|
"loss": 0.0937, |
|
"objective": 0.10326550155878067, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.10326550155878067, |
|
"step": 105, |
|
"wo_beta": 14.697186470031738 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916998624801636, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 36.53960499520599, |
|
"learning_rate": 9.99956257238817e-07, |
|
"logits": -1.3666936159133911, |
|
"logps": -82.67723083496094, |
|
"loss": 0.1009, |
|
"objective": 0.09831760078668594, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.09831760078668594, |
|
"step": 110, |
|
"wo_beta": 14.75289249420166 |
|
}, |
|
{ |
|
"dpo_loss": 0.6972029805183411, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 32.3431868996238, |
|
"learning_rate": 9.997785653888834e-07, |
|
"logits": -1.351915955543518, |
|
"logps": -82.5732650756836, |
|
"loss": 0.1062, |
|
"objective": 0.10171337425708771, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.10171337425708771, |
|
"step": 115, |
|
"wo_beta": 16.003950119018555 |
|
}, |
|
{ |
|
"dpo_loss": 0.6885399222373962, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 35.92878266852989, |
|
"learning_rate": 9.994642390694308e-07, |
|
"logits": -1.367909550666809, |
|
"logps": -82.90719604492188, |
|
"loss": 0.1098, |
|
"objective": 0.11067435145378113, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.11067435145378113, |
|
"step": 120, |
|
"wo_beta": 15.639138221740723 |
|
}, |
|
{ |
|
"dpo_loss": 0.6936843395233154, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 30.26276247254467, |
|
"learning_rate": 9.990133642141357e-07, |
|
"logits": -1.3929860591888428, |
|
"logps": -85.65290069580078, |
|
"loss": 0.1056, |
|
"objective": 0.11743973940610886, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.11743973940610886, |
|
"step": 125, |
|
"wo_beta": 15.93514633178711 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941003799438477, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 39.21461417787312, |
|
"learning_rate": 9.98426064087682e-07, |
|
"logits": -1.3525993824005127, |
|
"logps": -83.56419372558594, |
|
"loss": 0.1211, |
|
"objective": 0.11899420619010925, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.11899420619010925, |
|
"step": 130, |
|
"wo_beta": 16.0157527923584 |
|
}, |
|
{ |
|
"dpo_loss": 0.6882577538490295, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 32.67768184928008, |
|
"learning_rate": 9.977024992520601e-07, |
|
"logits": -1.3901729583740234, |
|
"logps": -84.39146423339844, |
|
"loss": 0.1253, |
|
"objective": 0.12414517998695374, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.12414517998695374, |
|
"step": 135, |
|
"wo_beta": 14.371219635009766 |
|
}, |
|
{ |
|
"dpo_loss": 0.6830641627311707, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 33.07732649314307, |
|
"learning_rate": 9.968428675226713e-07, |
|
"logits": -1.3437649011611938, |
|
"logps": -85.44697570800781, |
|
"loss": 0.1248, |
|
"objective": 0.12058641016483307, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.12058641016483307, |
|
"step": 140, |
|
"wo_beta": 14.547070503234863 |
|
}, |
|
{ |
|
"dpo_loss": 0.6851420998573303, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 30.784646211601874, |
|
"learning_rate": 9.958474039142469e-07, |
|
"logits": -1.3567951917648315, |
|
"logps": -86.4469223022461, |
|
"loss": 0.1319, |
|
"objective": 0.13056445121765137, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.13056445121765137, |
|
"step": 145, |
|
"wo_beta": 13.91884994506836 |
|
}, |
|
{ |
|
"dpo_loss": 0.6960374116897583, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 32.05337681597037, |
|
"learning_rate": 9.947163805765979e-07, |
|
"logits": -1.3565360307693481, |
|
"logps": -86.30919647216797, |
|
"loss": 0.1323, |
|
"objective": 0.12925057113170624, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.12925057113170624, |
|
"step": 150, |
|
"wo_beta": 16.796695709228516 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.695567786693573, |
|
"eval_logits": -1.3053797483444214, |
|
"eval_logps": -90.43773651123047, |
|
"eval_loss": 0.07677316665649414, |
|
"eval_objective": 0.07639209181070328, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5222567319869995, |
|
"eval_regularize": 0.07639209181070328, |
|
"eval_runtime": 526.1958, |
|
"eval_samples_per_second": 11.004, |
|
"eval_steps_per_second": 0.918, |
|
"eval_wo_beta": 16.003387451171875, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933045983314514, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 31.605620123374155, |
|
"learning_rate": 9.934501067202117e-07, |
|
"logits": -1.3933676481246948, |
|
"logps": -83.03238677978516, |
|
"loss": 0.1358, |
|
"objective": 0.1285211592912674, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.1285211592912674, |
|
"step": 155, |
|
"wo_beta": 15.31113338470459 |
|
}, |
|
{ |
|
"dpo_loss": 0.6946766972541809, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 32.22880904067845, |
|
"learning_rate": 9.92048928531717e-07, |
|
"logits": -1.2931861877441406, |
|
"logps": -83.0308837890625, |
|
"loss": 0.1338, |
|
"objective": 0.12377996742725372, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.12377996742725372, |
|
"step": 160, |
|
"wo_beta": 14.51412296295166 |
|
}, |
|
{ |
|
"dpo_loss": 0.6858457326889038, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 28.56289647538006, |
|
"learning_rate": 9.905132290792392e-07, |
|
"logits": -1.3845534324645996, |
|
"logps": -84.35334777832031, |
|
"loss": 0.1295, |
|
"objective": 0.13048619031906128, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.13048619031906128, |
|
"step": 165, |
|
"wo_beta": 15.858311653137207 |
|
}, |
|
{ |
|
"dpo_loss": 0.6987485289573669, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 31.697158183348822, |
|
"learning_rate": 9.888434282076757e-07, |
|
"logits": -1.3974741697311401, |
|
"logps": -82.40156555175781, |
|
"loss": 0.1376, |
|
"objective": 0.14300216734409332, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.14300216734409332, |
|
"step": 170, |
|
"wo_beta": 15.730175018310547 |
|
}, |
|
{ |
|
"dpo_loss": 0.6993536353111267, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 30.951333756278135, |
|
"learning_rate": 9.870399824239114e-07, |
|
"logits": -1.2470077276229858, |
|
"logps": -83.35051727294922, |
|
"loss": 0.1401, |
|
"objective": 0.13475559651851654, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.13475559651851654, |
|
"step": 175, |
|
"wo_beta": 17.82953643798828 |
|
}, |
|
{ |
|
"dpo_loss": 0.6983634233474731, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 34.822921079044, |
|
"learning_rate": 9.851033847720164e-07, |
|
"logits": -1.2282413244247437, |
|
"logps": -83.51294708251953, |
|
"loss": 0.1442, |
|
"objective": 0.143393412232399, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 0.143393412232399, |
|
"step": 180, |
|
"wo_beta": 14.920221328735352 |
|
}, |
|
{ |
|
"dpo_loss": 0.6972795128822327, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 34.3447207787113, |
|
"learning_rate": 9.83034164698452e-07, |
|
"logits": -1.2574915885925293, |
|
"logps": -82.5478515625, |
|
"loss": 0.1382, |
|
"objective": 0.14230893552303314, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.14230893552303314, |
|
"step": 185, |
|
"wo_beta": 14.194059371948242 |
|
}, |
|
{ |
|
"dpo_loss": 0.6978750824928284, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 34.00712851830173, |
|
"learning_rate": 9.808328879073251e-07, |
|
"logits": -1.2612725496292114, |
|
"logps": -81.91997528076172, |
|
"loss": 0.1466, |
|
"objective": 0.14948724210262299, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.14948724210262299, |
|
"step": 190, |
|
"wo_beta": 16.620363235473633 |
|
}, |
|
{ |
|
"dpo_loss": 0.6822370290756226, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 31.586658287520144, |
|
"learning_rate": 9.78500156205731e-07, |
|
"logits": -1.2822577953338623, |
|
"logps": -83.0813217163086, |
|
"loss": 0.1319, |
|
"objective": 0.13207347691059113, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.13207347691059113, |
|
"step": 195, |
|
"wo_beta": 14.693647384643555 |
|
}, |
|
{ |
|
"dpo_loss": 0.7044106721878052, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 30.369620708498754, |
|
"learning_rate": 9.760366073392244e-07, |
|
"logits": -1.3258157968521118, |
|
"logps": -83.32820129394531, |
|
"loss": 0.1427, |
|
"objective": 0.15046708285808563, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.15046708285808563, |
|
"step": 200, |
|
"wo_beta": 15.960111618041992 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6959174871444702, |
|
"eval_logits": -1.3123745918273926, |
|
"eval_logps": -88.34333801269531, |
|
"eval_loss": 0.10319730639457703, |
|
"eval_objective": 0.10169863700866699, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5222567319869995, |
|
"eval_regularize": 0.10169863700866699, |
|
"eval_runtime": 532.3008, |
|
"eval_samples_per_second": 10.877, |
|
"eval_steps_per_second": 0.907, |
|
"eval_wo_beta": 15.992826461791992, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.7000283598899841, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 30.076737378719095, |
|
"learning_rate": 9.734429148174674e-07, |
|
"logits": -1.2141386270523071, |
|
"logps": -82.74073028564453, |
|
"loss": 0.1484, |
|
"objective": 0.1470470279455185, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.1470470279455185, |
|
"step": 205, |
|
"wo_beta": 16.118446350097656 |
|
}, |
|
{ |
|
"dpo_loss": 0.6862087249755859, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 31.36222267459615, |
|
"learning_rate": 9.707197877300973e-07, |
|
"logits": -1.2483521699905396, |
|
"logps": -82.3885269165039, |
|
"loss": 0.1454, |
|
"objective": 0.14993111789226532, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.14993111789226532, |
|
"step": 210, |
|
"wo_beta": 15.07961654663086 |
|
}, |
|
{ |
|
"dpo_loss": 0.6946883797645569, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 32.25125352651472, |
|
"learning_rate": 9.678679705528698e-07, |
|
"logits": -1.3168671131134033, |
|
"logps": -82.3456039428711, |
|
"loss": 0.1384, |
|
"objective": 0.14188070595264435, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.14188070595264435, |
|
"step": 215, |
|
"wo_beta": 16.104469299316406 |
|
}, |
|
{ |
|
"dpo_loss": 0.7026723027229309, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 30.142053540661294, |
|
"learning_rate": 9.648882429441256e-07, |
|
"logits": -1.3188337087631226, |
|
"logps": -82.63532257080078, |
|
"loss": 0.1477, |
|
"objective": 0.1607874184846878, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.1607874184846878, |
|
"step": 220, |
|
"wo_beta": 17.079347610473633 |
|
}, |
|
{ |
|
"dpo_loss": 0.6998167634010315, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 29.418648888160003, |
|
"learning_rate": 9.61781419531641e-07, |
|
"logits": -1.3314566612243652, |
|
"logps": -82.72489929199219, |
|
"loss": 0.1465, |
|
"objective": 0.14282181859016418, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.14282181859016418, |
|
"step": 225, |
|
"wo_beta": 15.506386756896973 |
|
}, |
|
{ |
|
"dpo_loss": 0.7007436156272888, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 31.584769522955447, |
|
"learning_rate": 9.585483496899149e-07, |
|
"logits": -1.2612279653549194, |
|
"logps": -82.21707916259766, |
|
"loss": 0.1434, |
|
"objective": 0.14342841506004333, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.14342841506004333, |
|
"step": 230, |
|
"wo_beta": 16.431724548339844 |
|
}, |
|
{ |
|
"dpo_loss": 0.7085835337638855, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 33.138665174716316, |
|
"learning_rate": 9.551899173079606e-07, |
|
"logits": -1.2083913087844849, |
|
"logps": -84.15171813964844, |
|
"loss": 0.1479, |
|
"objective": 0.14772751927375793, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.14772751927375793, |
|
"step": 235, |
|
"wo_beta": 15.722906112670898 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893501877784729, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 28.511782322472136, |
|
"learning_rate": 9.517070405476574e-07, |
|
"logits": -1.3556396961212158, |
|
"logps": -83.491943359375, |
|
"loss": 0.1408, |
|
"objective": 0.1575685441493988, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.1575685441493988, |
|
"step": 240, |
|
"wo_beta": 15.692626953125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901037693023682, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 28.887977273452503, |
|
"learning_rate": 9.481006715927351e-07, |
|
"logits": -1.3499360084533691, |
|
"logps": -82.59223937988281, |
|
"loss": 0.1422, |
|
"objective": 0.1397981345653534, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.1397981345653534, |
|
"step": 245, |
|
"wo_beta": 15.627848625183105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898453831672668, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 30.778123472149638, |
|
"learning_rate": 9.443717963884568e-07, |
|
"logits": -1.1249743700027466, |
|
"logps": -81.38602447509766, |
|
"loss": 0.1451, |
|
"objective": 0.12806275486946106, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.12806275486946106, |
|
"step": 250, |
|
"wo_beta": 14.860217094421387 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.6950441002845764, |
|
"eval_logits": -1.2854480743408203, |
|
"eval_logps": -88.06980895996094, |
|
"eval_loss": 0.11781599372625351, |
|
"eval_objective": 0.11854107677936554, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.11854107677936554, |
|
"eval_runtime": 533.5732, |
|
"eval_samples_per_second": 10.851, |
|
"eval_steps_per_second": 0.905, |
|
"eval_wo_beta": 15.787796020507812, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.6893075704574585, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 27.48861543576658, |
|
"learning_rate": 9.405214343720706e-07, |
|
"logits": -1.3376212120056152, |
|
"logps": -81.39327239990234, |
|
"loss": 0.1325, |
|
"objective": 0.12804514169692993, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.42916667461395264, |
|
"ranking_simple": 0.44583332538604736, |
|
"regularize": 0.12804514169692993, |
|
"step": 255, |
|
"wo_beta": 14.828557968139648 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919839978218079, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 27.470977695013012, |
|
"learning_rate": 9.365506381941065e-07, |
|
"logits": -1.3046835660934448, |
|
"logps": -83.32947540283203, |
|
"loss": 0.1509, |
|
"objective": 0.15500593185424805, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.4583333432674408, |
|
"regularize": 0.15500593185424805, |
|
"step": 260, |
|
"wo_beta": 15.419398307800293 |
|
}, |
|
{ |
|
"dpo_loss": 0.6987964510917664, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 29.786537519342414, |
|
"learning_rate": 9.32460493430591e-07, |
|
"logits": -1.2736799716949463, |
|
"logps": -82.46897888183594, |
|
"loss": 0.1444, |
|
"objective": 0.14515246450901031, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.14515246450901031, |
|
"step": 265, |
|
"wo_beta": 15.908428192138672 |
|
}, |
|
{ |
|
"dpo_loss": 0.6944437026977539, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 28.172549175339846, |
|
"learning_rate": 9.282521182862629e-07, |
|
"logits": -1.397876262664795, |
|
"logps": -82.14982604980469, |
|
"loss": 0.1491, |
|
"objective": 0.15289539098739624, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4541666805744171, |
|
"regularize": 0.15289539098739624, |
|
"step": 270, |
|
"wo_beta": 14.118414878845215 |
|
}, |
|
{ |
|
"dpo_loss": 0.6878421902656555, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 30.974249065309053, |
|
"learning_rate": 9.239266632888658e-07, |
|
"logits": -1.265884280204773, |
|
"logps": -80.5745849609375, |
|
"loss": 0.1429, |
|
"objective": 0.13965575397014618, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.13965575397014618, |
|
"step": 275, |
|
"wo_beta": 15.147540092468262 |
|
}, |
|
{ |
|
"dpo_loss": 0.693124532699585, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 27.26309671203667, |
|
"learning_rate": 9.194853109746072e-07, |
|
"logits": -1.317248821258545, |
|
"logps": -80.71721649169922, |
|
"loss": 0.1422, |
|
"objective": 0.13741357624530792, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.13741357624530792, |
|
"step": 280, |
|
"wo_beta": 15.141572952270508 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898981332778931, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 29.618387771117387, |
|
"learning_rate": 9.14929275564863e-07, |
|
"logits": -1.2990264892578125, |
|
"logps": -81.34524536132812, |
|
"loss": 0.1481, |
|
"objective": 0.14202959835529327, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.14202959835529327, |
|
"step": 285, |
|
"wo_beta": 16.715734481811523 |
|
}, |
|
{ |
|
"dpo_loss": 0.6989319920539856, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 30.35546225687188, |
|
"learning_rate": 9.102598026342222e-07, |
|
"logits": -1.310984492301941, |
|
"logps": -80.47208404541016, |
|
"loss": 0.1416, |
|
"objective": 0.13658234477043152, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.13658234477043152, |
|
"step": 290, |
|
"wo_beta": 15.537436485290527 |
|
}, |
|
{ |
|
"dpo_loss": 0.692668080329895, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 28.386489735858774, |
|
"learning_rate": 9.0547816876996e-07, |
|
"logits": -1.3056447505950928, |
|
"logps": -80.58573913574219, |
|
"loss": 0.1335, |
|
"objective": 0.14200052618980408, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.14200052618980408, |
|
"step": 295, |
|
"wo_beta": 15.984179496765137 |
|
}, |
|
{ |
|
"dpo_loss": 0.6959550380706787, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 30.90903589796416, |
|
"learning_rate": 9.005856812230304e-07, |
|
"logits": -1.2770187854766846, |
|
"logps": -79.3738784790039, |
|
"loss": 0.1305, |
|
"objective": 0.12751255929470062, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.12751255929470062, |
|
"step": 300, |
|
"wo_beta": 14.3499755859375 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6960889101028442, |
|
"eval_logits": -1.2862635850906372, |
|
"eval_logps": -86.33123016357422, |
|
"eval_loss": 0.12468627840280533, |
|
"eval_objective": 0.1251634955406189, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.1251634955406189, |
|
"eval_runtime": 492.3852, |
|
"eval_samples_per_second": 11.759, |
|
"eval_steps_per_second": 0.981, |
|
"eval_wo_beta": 15.766751289367676, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.6915071606636047, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 28.35320542673635, |
|
"learning_rate": 8.955836775506775e-07, |
|
"logits": -1.2531558275222778, |
|
"logps": -80.3687744140625, |
|
"loss": 0.1326, |
|
"objective": 0.1348031610250473, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.1348031610250473, |
|
"step": 305, |
|
"wo_beta": 15.622274398803711 |
|
}, |
|
{ |
|
"dpo_loss": 0.6971884965896606, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 28.116582054859066, |
|
"learning_rate": 8.904735252507609e-07, |
|
"logits": -1.256584882736206, |
|
"logps": -79.94914245605469, |
|
"loss": 0.1365, |
|
"objective": 0.1369226723909378, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.1369226723909378, |
|
"step": 310, |
|
"wo_beta": 14.816594123840332 |
|
}, |
|
{ |
|
"dpo_loss": 0.6855903267860413, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 29.897768012112312, |
|
"learning_rate": 8.852566213878946e-07, |
|
"logits": -1.2702066898345947, |
|
"logps": -79.8655014038086, |
|
"loss": 0.1353, |
|
"objective": 0.13145793974399567, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.13145793974399567, |
|
"step": 315, |
|
"wo_beta": 15.161810874938965 |
|
}, |
|
{ |
|
"dpo_loss": 0.691845178604126, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 28.736143424115674, |
|
"learning_rate": 8.799343922115043e-07, |
|
"logits": -1.2241441011428833, |
|
"logps": -82.17134094238281, |
|
"loss": 0.13, |
|
"objective": 0.1402612328529358, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.1402612328529358, |
|
"step": 320, |
|
"wo_beta": 15.099017143249512 |
|
}, |
|
{ |
|
"dpo_loss": 0.6962689161300659, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 28.356303375759392, |
|
"learning_rate": 8.745082927659046e-07, |
|
"logits": -1.2910945415496826, |
|
"logps": -83.30491638183594, |
|
"loss": 0.1308, |
|
"objective": 0.14350637793540955, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.14350637793540955, |
|
"step": 325, |
|
"wo_beta": 15.133590698242188 |
|
}, |
|
{ |
|
"dpo_loss": 0.6975868344306946, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 29.00689810312343, |
|
"learning_rate": 8.689798064925048e-07, |
|
"logits": -1.1349345445632935, |
|
"logps": -82.04910278320312, |
|
"loss": 0.1321, |
|
"objective": 0.1296585500240326, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.1296585500240326, |
|
"step": 330, |
|
"wo_beta": 16.1423282623291 |
|
}, |
|
{ |
|
"dpo_loss": 0.7005541920661926, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 32.3756572284601, |
|
"learning_rate": 8.633504448242504e-07, |
|
"logits": -1.149806261062622, |
|
"logps": -81.64175415039062, |
|
"loss": 0.1375, |
|
"objective": 0.1390267014503479, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.1390267014503479, |
|
"step": 335, |
|
"wo_beta": 15.652006149291992 |
|
}, |
|
{ |
|
"dpo_loss": 0.6950960755348206, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 27.122604040368284, |
|
"learning_rate": 8.576217467724127e-07, |
|
"logits": -1.2132624387741089, |
|
"logps": -80.64006042480469, |
|
"loss": 0.1292, |
|
"objective": 0.12200692296028137, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.12200692296028137, |
|
"step": 340, |
|
"wo_beta": 15.907822608947754 |
|
}, |
|
{ |
|
"dpo_loss": 0.6975562572479248, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 27.134170349804087, |
|
"learning_rate": 8.517952785058384e-07, |
|
"logits": -1.2632955312728882, |
|
"logps": -80.71128845214844, |
|
"loss": 0.1283, |
|
"objective": 0.11938898265361786, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.11938898265361786, |
|
"step": 345, |
|
"wo_beta": 14.762292861938477 |
|
}, |
|
{ |
|
"dpo_loss": 0.6852299571037292, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 27.658996359022336, |
|
"learning_rate": 8.458726329227747e-07, |
|
"logits": -1.1914026737213135, |
|
"logps": -81.73149108886719, |
|
"loss": 0.1407, |
|
"objective": 0.1554519683122635, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1554519683122635, |
|
"step": 350, |
|
"wo_beta": 15.107101440429688 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6975587606430054, |
|
"eval_logits": -1.2756990194320679, |
|
"eval_logps": -86.45014190673828, |
|
"eval_loss": 0.13138790428638458, |
|
"eval_objective": 0.13096390664577484, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5222567319869995, |
|
"eval_regularize": 0.13096390664577484, |
|
"eval_runtime": 498.5276, |
|
"eval_samples_per_second": 11.614, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wo_beta": 15.656978607177734, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.6982021331787109, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 28.652193663332632, |
|
"learning_rate": 8.398554292153865e-07, |
|
"logits": -1.3350815773010254, |
|
"logps": -79.34367370605469, |
|
"loss": 0.1274, |
|
"objective": 0.1257932186126709, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.1257932186126709, |
|
"step": 355, |
|
"wo_beta": 16.378000259399414 |
|
}, |
|
{ |
|
"dpo_loss": 0.6944258809089661, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 27.709591206743504, |
|
"learning_rate": 8.337453124270862e-07, |
|
"logits": -1.2474267482757568, |
|
"logps": -80.31254577636719, |
|
"loss": 0.1453, |
|
"objective": 0.14443162083625793, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.14443162083625793, |
|
"step": 360, |
|
"wo_beta": 16.190935134887695 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928178071975708, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 31.070681767199403, |
|
"learning_rate": 8.275439530027947e-07, |
|
"logits": -1.276475191116333, |
|
"logps": -80.50602722167969, |
|
"loss": 0.1371, |
|
"objective": 0.13979977369308472, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.13979977369308472, |
|
"step": 365, |
|
"wo_beta": 14.378859519958496 |
|
}, |
|
{ |
|
"dpo_loss": 0.699609637260437, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 30.003574042191506, |
|
"learning_rate": 8.212530463322582e-07, |
|
"logits": -1.2496185302734375, |
|
"logps": -79.11912536621094, |
|
"loss": 0.1306, |
|
"objective": 0.1423943042755127, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.1423943042755127, |
|
"step": 370, |
|
"wo_beta": 15.124627113342285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6967942714691162, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 27.953352635424668, |
|
"learning_rate": 8.148743122865463e-07, |
|
"logits": -1.3011940717697144, |
|
"logps": -80.02760314941406, |
|
"loss": 0.1297, |
|
"objective": 0.11541719734668732, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.11541719734668732, |
|
"step": 375, |
|
"wo_beta": 15.568713188171387 |
|
}, |
|
{ |
|
"dpo_loss": 0.6877638697624207, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 27.32675287386393, |
|
"learning_rate": 8.084094947478554e-07, |
|
"logits": -1.2718795537948608, |
|
"logps": -81.57784271240234, |
|
"loss": 0.1306, |
|
"objective": 0.1403437703847885, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1403437703847885, |
|
"step": 380, |
|
"wo_beta": 15.024064064025879 |
|
}, |
|
{ |
|
"dpo_loss": 0.7029018402099609, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 25.199092121516863, |
|
"learning_rate": 8.018603611327504e-07, |
|
"logits": -1.2051031589508057, |
|
"logps": -80.49242401123047, |
|
"loss": 0.1289, |
|
"objective": 0.12692388892173767, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.12692388892173767, |
|
"step": 385, |
|
"wo_beta": 15.658522605895996 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920034885406494, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 28.52425339340298, |
|
"learning_rate": 7.952287019089685e-07, |
|
"logits": -1.1542584896087646, |
|
"logps": -82.2014389038086, |
|
"loss": 0.128, |
|
"objective": 0.13050222396850586, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.13050222396850586, |
|
"step": 390, |
|
"wo_beta": 15.990551948547363 |
|
}, |
|
{ |
|
"dpo_loss": 0.6983939409255981, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 28.676328293583875, |
|
"learning_rate": 7.88516330105925e-07, |
|
"logits": -1.21431303024292, |
|
"logps": -81.3152847290039, |
|
"loss": 0.1271, |
|
"objective": 0.12024066597223282, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.12024066597223282, |
|
"step": 395, |
|
"wo_beta": 14.856566429138184 |
|
}, |
|
{ |
|
"dpo_loss": 0.7045825719833374, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 26.361954924055155, |
|
"learning_rate": 7.817250808190483e-07, |
|
"logits": -1.2783249616622925, |
|
"logps": -79.67323303222656, |
|
"loss": 0.1245, |
|
"objective": 0.12074790149927139, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.12074790149927139, |
|
"step": 400, |
|
"wo_beta": 15.344539642333984 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.698018491268158, |
|
"eval_logits": -1.2417831420898438, |
|
"eval_logps": -86.2849349975586, |
|
"eval_loss": 0.13988268375396729, |
|
"eval_objective": 0.13904725015163422, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 0.13904725015163422, |
|
"eval_runtime": 544.4211, |
|
"eval_samples_per_second": 10.635, |
|
"eval_steps_per_second": 0.887, |
|
"eval_wo_beta": 15.614696502685547, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.6944829225540161, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 26.829396266860115, |
|
"learning_rate": 7.74856810708083e-07, |
|
"logits": -1.2358256578445435, |
|
"logps": -80.91136169433594, |
|
"loss": 0.1252, |
|
"objective": 0.13733495771884918, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.13733495771884918, |
|
"step": 405, |
|
"wo_beta": 16.799846649169922 |
|
}, |
|
{ |
|
"dpo_loss": 0.6951694488525391, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 25.84880624163644, |
|
"learning_rate": 7.679133974894982e-07, |
|
"logits": -1.2413955926895142, |
|
"logps": -80.84453582763672, |
|
"loss": 0.1146, |
|
"objective": 0.10967493802309036, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.10967493802309036, |
|
"step": 410, |
|
"wo_beta": 16.299657821655273 |
|
}, |
|
{ |
|
"dpo_loss": 0.6816955804824829, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 28.539266676030703, |
|
"learning_rate": 7.608967394231386e-07, |
|
"logits": -1.1460075378417969, |
|
"logps": -80.07962799072266, |
|
"loss": 0.1201, |
|
"objective": 0.11568634957075119, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.11568634957075119, |
|
"step": 415, |
|
"wo_beta": 15.849366188049316 |
|
}, |
|
{ |
|
"dpo_loss": 0.6958954334259033, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 26.83226072322417, |
|
"learning_rate": 7.538087547932584e-07, |
|
"logits": -1.1252403259277344, |
|
"logps": -80.94552612304688, |
|
"loss": 0.1212, |
|
"objective": 0.11827482283115387, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.11827482283115387, |
|
"step": 420, |
|
"wo_beta": 16.14940643310547 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887015700340271, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 26.51780573149761, |
|
"learning_rate": 7.466513813840824e-07, |
|
"logits": -1.1933962106704712, |
|
"logps": -78.89797973632812, |
|
"loss": 0.1135, |
|
"objective": 0.1143736019730568, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.1143736019730568, |
|
"step": 425, |
|
"wo_beta": 14.578470230102539 |
|
}, |
|
{ |
|
"dpo_loss": 0.6991615891456604, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 27.122286588814305, |
|
"learning_rate": 7.394265759500347e-07, |
|
"logits": -1.1930339336395264, |
|
"logps": -80.126220703125, |
|
"loss": 0.1127, |
|
"objective": 0.11676573753356934, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.11676573753356934, |
|
"step": 430, |
|
"wo_beta": 15.9819974899292 |
|
}, |
|
{ |
|
"dpo_loss": 0.6940677762031555, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 26.542064973728884, |
|
"learning_rate": 7.321363136807818e-07, |
|
"logits": -1.1478148698806763, |
|
"logps": -80.018310546875, |
|
"loss": 0.1273, |
|
"objective": 0.12024448066949844, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.12024448066949844, |
|
"step": 435, |
|
"wo_beta": 17.044919967651367 |
|
}, |
|
{ |
|
"dpo_loss": 0.6969379186630249, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 28.251093862423456, |
|
"learning_rate": 7.247825876612352e-07, |
|
"logits": -1.1687721014022827, |
|
"logps": -79.19255828857422, |
|
"loss": 0.1253, |
|
"objective": 0.12027813494205475, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.12027813494205475, |
|
"step": 440, |
|
"wo_beta": 14.654241561889648 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925280094146729, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 26.524957115429544, |
|
"learning_rate": 7.173674083266623e-07, |
|
"logits": -1.1623238325119019, |
|
"logps": -80.57234191894531, |
|
"loss": 0.1123, |
|
"objective": 0.11110406368970871, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.11110406368970871, |
|
"step": 445, |
|
"wo_beta": 16.533472061157227 |
|
}, |
|
{ |
|
"dpo_loss": 0.6959200501441956, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 25.817109114436615, |
|
"learning_rate": 7.098928029130528e-07, |
|
"logits": -1.2953335046768188, |
|
"logps": -80.17058563232422, |
|
"loss": 0.1163, |
|
"objective": 0.11630918085575104, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.11630918085575104, |
|
"step": 450, |
|
"wo_beta": 14.598237991333008 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.6984797716140747, |
|
"eval_logits": -1.2306897640228271, |
|
"eval_logps": -85.48281860351562, |
|
"eval_loss": 0.14205217361450195, |
|
"eval_objective": 0.14207439124584198, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.14207439124584198, |
|
"eval_runtime": 502.3414, |
|
"eval_samples_per_second": 11.526, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wo_beta": 15.61281681060791, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.6977149248123169, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 29.01794849451687, |
|
"learning_rate": 7.023608149028936e-07, |
|
"logits": -1.1321525573730469, |
|
"logps": -79.79704284667969, |
|
"loss": 0.1102, |
|
"objective": 0.10798730701208115, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.10798730701208115, |
|
"step": 455, |
|
"wo_beta": 14.988642692565918 |
|
}, |
|
{ |
|
"dpo_loss": 0.6960881352424622, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 25.82316278857825, |
|
"learning_rate": 6.947735034665001e-07, |
|
"logits": -1.2272473573684692, |
|
"logps": -79.4093246459961, |
|
"loss": 0.1071, |
|
"objective": 0.10132616013288498, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.10132616013288498, |
|
"step": 460, |
|
"wo_beta": 15.888258934020996 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906372308731079, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 30.635018246102483, |
|
"learning_rate": 6.871329428990601e-07, |
|
"logits": -1.2102056741714478, |
|
"logps": -78.2228775024414, |
|
"loss": 0.1131, |
|
"objective": 0.11604170501232147, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.11604170501232147, |
|
"step": 465, |
|
"wo_beta": 14.311129570007324 |
|
}, |
|
{ |
|
"dpo_loss": 0.700882077217102, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 27.46778566417897, |
|
"learning_rate": 6.794412220535425e-07, |
|
"logits": -1.2833130359649658, |
|
"logps": -77.55262756347656, |
|
"loss": 0.108, |
|
"objective": 0.10955775529146194, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.10955775529146194, |
|
"step": 470, |
|
"wo_beta": 14.30273151397705 |
|
}, |
|
{ |
|
"dpo_loss": 0.6947088241577148, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 27.567991845029866, |
|
"learning_rate": 6.717004437696249e-07, |
|
"logits": -1.1878196001052856, |
|
"logps": -79.7737808227539, |
|
"loss": 0.1143, |
|
"objective": 0.10682200640439987, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.10682200640439987, |
|
"step": 475, |
|
"wo_beta": 16.000301361083984 |
|
}, |
|
{ |
|
"dpo_loss": 0.7012575268745422, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 26.96292751307233, |
|
"learning_rate": 6.639127242987987e-07, |
|
"logits": -1.2194726467132568, |
|
"logps": -79.7364730834961, |
|
"loss": 0.1121, |
|
"objective": 0.10879840701818466, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.10879840701818466, |
|
"step": 480, |
|
"wo_beta": 17.723169326782227 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902076005935669, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 25.536217139623062, |
|
"learning_rate": 6.560801927258079e-07, |
|
"logits": -1.2140812873840332, |
|
"logps": -77.77493286132812, |
|
"loss": 0.1063, |
|
"objective": 0.10283537954092026, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.10283537954092026, |
|
"step": 485, |
|
"wo_beta": 16.162378311157227 |
|
}, |
|
{ |
|
"dpo_loss": 0.7003743648529053, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 25.340128312194377, |
|
"learning_rate": 6.482049903865768e-07, |
|
"logits": -1.1755324602127075, |
|
"logps": -80.6698226928711, |
|
"loss": 0.1065, |
|
"objective": 0.11661101877689362, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.11661101877689362, |
|
"step": 490, |
|
"wo_beta": 15.291964530944824 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933376789093018, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 27.42214588210337, |
|
"learning_rate": 6.402892702827916e-07, |
|
"logits": -1.203405499458313, |
|
"logps": -81.71482849121094, |
|
"loss": 0.1083, |
|
"objective": 0.11117922514677048, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.11117922514677048, |
|
"step": 495, |
|
"wo_beta": 15.19780445098877 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919417977333069, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 25.59099749967404, |
|
"learning_rate": 6.323351964932908e-07, |
|
"logits": -1.1464035511016846, |
|
"logps": -80.67649841308594, |
|
"loss": 0.1071, |
|
"objective": 0.10751333087682724, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.10751333087682724, |
|
"step": 500, |
|
"wo_beta": 14.786382675170898 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.6979657411575317, |
|
"eval_logits": -1.2270138263702393, |
|
"eval_logps": -87.26725006103516, |
|
"eval_loss": 0.13817694783210754, |
|
"eval_objective": 0.1376110315322876, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 0.1376110315322876, |
|
"eval_runtime": 507.9058, |
|
"eval_samples_per_second": 11.4, |
|
"eval_steps_per_second": 0.951, |
|
"eval_wo_beta": 15.64445686340332, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891559362411499, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 24.27122577359571, |
|
"learning_rate": 6.243449435824276e-07, |
|
"logits": -1.2177590131759644, |
|
"logps": -81.35147094726562, |
|
"loss": 0.1101, |
|
"objective": 0.1094871535897255, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.1094871535897255, |
|
"step": 505, |
|
"wo_beta": 15.79046630859375 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941244602203369, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 25.930769694740054, |
|
"learning_rate": 6.163206960055652e-07, |
|
"logits": -1.251134991645813, |
|
"logps": -83.10639953613281, |
|
"loss": 0.1006, |
|
"objective": 0.09994279593229294, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.09994279593229294, |
|
"step": 510, |
|
"wo_beta": 14.899516105651855 |
|
}, |
|
{ |
|
"dpo_loss": 0.6874905824661255, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 26.602314880639124, |
|
"learning_rate": 6.082646475118699e-07, |
|
"logits": -1.2633229494094849, |
|
"logps": -84.02688598632812, |
|
"loss": 0.106, |
|
"objective": 0.10199037194252014, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.4541666805744171, |
|
"regularize": 0.10199037194252014, |
|
"step": 515, |
|
"wo_beta": 15.941681861877441 |
|
}, |
|
{ |
|
"dpo_loss": 0.6967552900314331, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 29.852612268822412, |
|
"learning_rate": 6.001790005445606e-07, |
|
"logits": -1.184912919998169, |
|
"logps": -80.95891571044922, |
|
"loss": 0.1071, |
|
"objective": 0.10300089418888092, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.10300089418888092, |
|
"step": 520, |
|
"wo_beta": 15.731270790100098 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896428465843201, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 25.111572790175902, |
|
"learning_rate": 5.920659656387836e-07, |
|
"logits": -1.0910202264785767, |
|
"logps": -79.46784973144531, |
|
"loss": 0.1087, |
|
"objective": 0.10289794951677322, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.10289794951677322, |
|
"step": 525, |
|
"wo_beta": 14.915215492248535 |
|
}, |
|
{ |
|
"dpo_loss": 0.6884135603904724, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 29.155836377588727, |
|
"learning_rate": 5.839277608172738e-07, |
|
"logits": -1.2429722547531128, |
|
"logps": -82.09452056884766, |
|
"loss": 0.1052, |
|
"objective": 0.11081438511610031, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.11081438511610031, |
|
"step": 530, |
|
"wo_beta": 14.850537300109863 |
|
}, |
|
{ |
|
"dpo_loss": 0.6961663961410522, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 25.320925581209725, |
|
"learning_rate": 5.757666109839702e-07, |
|
"logits": -1.2323859930038452, |
|
"logps": -80.30747985839844, |
|
"loss": 0.1001, |
|
"objective": 0.09293892234563828, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.09293892234563828, |
|
"step": 535, |
|
"wo_beta": 15.262944221496582 |
|
}, |
|
{ |
|
"dpo_loss": 0.6923481225967407, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 32.01848958383342, |
|
"learning_rate": 5.675847473157485e-07, |
|
"logits": -1.1209362745285034, |
|
"logps": -80.81604766845703, |
|
"loss": 0.1017, |
|
"objective": 0.1114068478345871, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.1114068478345871, |
|
"step": 540, |
|
"wo_beta": 14.910977363586426 |
|
}, |
|
{ |
|
"dpo_loss": 0.6916370987892151, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 25.57319909143034, |
|
"learning_rate": 5.5938440665244e-07, |
|
"logits": -1.2216829061508179, |
|
"logps": -81.30005645751953, |
|
"loss": 0.1016, |
|
"objective": 0.09744974970817566, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.09744974970817566, |
|
"step": 545, |
|
"wo_beta": 14.310770988464355 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908753514289856, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 25.722462769354692, |
|
"learning_rate": 5.511678308853025e-07, |
|
"logits": -1.2278273105621338, |
|
"logps": -81.18257141113281, |
|
"loss": 0.1045, |
|
"objective": 0.11294317990541458, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.44583332538604736, |
|
"regularize": 0.11294317990541458, |
|
"step": 550, |
|
"wo_beta": 14.909473419189453 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.6977279186248779, |
|
"eval_logits": -1.2327359914779663, |
|
"eval_logps": -87.07755279541016, |
|
"eval_loss": 0.1427639275789261, |
|
"eval_objective": 0.14261718094348907, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 0.14261718094348907, |
|
"eval_runtime": 530.6929, |
|
"eval_samples_per_second": 10.91, |
|
"eval_steps_per_second": 0.91, |
|
"eval_wo_beta": 15.58066177368164, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.6979414820671082, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 25.97117042381748, |
|
"learning_rate": 5.429372663441085e-07, |
|
"logits": -1.0773119926452637, |
|
"logps": -80.85298919677734, |
|
"loss": 0.0969, |
|
"objective": 0.10372842103242874, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.10372842103242874, |
|
"step": 555, |
|
"wo_beta": 14.278889656066895 |
|
}, |
|
{ |
|
"dpo_loss": 0.6877902746200562, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 26.089498554586406, |
|
"learning_rate": 5.34694963183022e-07, |
|
"logits": -1.149969220161438, |
|
"logps": -80.23606872558594, |
|
"loss": 0.0937, |
|
"objective": 0.0943736732006073, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.0943736732006073, |
|
"step": 560, |
|
"wo_beta": 15.772320747375488 |
|
}, |
|
{ |
|
"dpo_loss": 0.6947767734527588, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 26.116747650931945, |
|
"learning_rate": 5.264431747654283e-07, |
|
"logits": -1.1340062618255615, |
|
"logps": -81.63863372802734, |
|
"loss": 0.0947, |
|
"objective": 0.10096503049135208, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.4416666626930237, |
|
"regularize": 0.10096503049135208, |
|
"step": 565, |
|
"wo_beta": 14.981669425964355 |
|
}, |
|
{ |
|
"dpo_loss": 0.692035436630249, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 30.63214010200871, |
|
"learning_rate": 5.181841570478872e-07, |
|
"logits": -1.2694156169891357, |
|
"logps": -81.64689636230469, |
|
"loss": 0.0959, |
|
"objective": 0.1027316302061081, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.1027316302061081, |
|
"step": 570, |
|
"wo_beta": 15.00640869140625 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928724646568298, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 26.684109688489027, |
|
"learning_rate": 5.099201679633768e-07, |
|
"logits": -1.219287633895874, |
|
"logps": -79.6671371459961, |
|
"loss": 0.0902, |
|
"objective": 0.08943381905555725, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.08943381905555725, |
|
"step": 575, |
|
"wo_beta": 15.883743286132812 |
|
}, |
|
{ |
|
"dpo_loss": 0.7014293670654297, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 23.659415037737205, |
|
"learning_rate": 5.016534668039976e-07, |
|
"logits": -1.245025396347046, |
|
"logps": -79.65864562988281, |
|
"loss": 0.0922, |
|
"objective": 0.09364978969097137, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.09364978969097137, |
|
"step": 580, |
|
"wo_beta": 14.111478805541992 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919021606445312, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 26.81667336982406, |
|
"learning_rate": 4.933863136033039e-07, |
|
"logits": -1.1303011178970337, |
|
"logps": -79.01573944091797, |
|
"loss": 0.0912, |
|
"objective": 0.09164983779191971, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.09164983779191971, |
|
"step": 585, |
|
"wo_beta": 15.943554878234863 |
|
}, |
|
{ |
|
"dpo_loss": 0.6958838701248169, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 24.98087975104312, |
|
"learning_rate": 4.851209685184338e-07, |
|
"logits": -1.1811211109161377, |
|
"logps": -78.23771667480469, |
|
"loss": 0.0896, |
|
"objective": 0.08815690129995346, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.08815690129995346, |
|
"step": 590, |
|
"wo_beta": 13.053691864013672 |
|
}, |
|
{ |
|
"dpo_loss": 0.7018415927886963, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 28.222712089048155, |
|
"learning_rate": 4.768596912122045e-07, |
|
"logits": -1.1410056352615356, |
|
"logps": -78.93828582763672, |
|
"loss": 0.0867, |
|
"objective": 0.08855770528316498, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.08855770528316498, |
|
"step": 595, |
|
"wo_beta": 16.56429672241211 |
|
}, |
|
{ |
|
"dpo_loss": 0.6934791803359985, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 26.368906194308657, |
|
"learning_rate": 4.686047402353433e-07, |
|
"logits": -1.1907525062561035, |
|
"logps": -80.13634490966797, |
|
"loss": 0.0866, |
|
"objective": 0.09509587287902832, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.09509587287902832, |
|
"step": 600, |
|
"wo_beta": 15.167766571044922 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6965176463127136, |
|
"eval_logits": -1.2196165323257446, |
|
"eval_logps": -85.19258880615234, |
|
"eval_loss": 0.14236733317375183, |
|
"eval_objective": 0.14079627394676208, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 0.14079627394676208, |
|
"eval_runtime": 531.3996, |
|
"eval_samples_per_second": 10.896, |
|
"eval_steps_per_second": 0.909, |
|
"eval_wo_beta": 15.660321235656738, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.6949159502983093, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 27.65546942935795, |
|
"learning_rate": 4.60358372409022e-07, |
|
"logits": -1.135356068611145, |
|
"logps": -80.58204650878906, |
|
"loss": 0.0851, |
|
"objective": 0.10239014774560928, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.10239014774560928, |
|
"step": 605, |
|
"wo_beta": 17.19474220275879 |
|
}, |
|
{ |
|
"dpo_loss": 0.6975926160812378, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 27.123580050770954, |
|
"learning_rate": 4.521228422078649e-07, |
|
"logits": -1.2206453084945679, |
|
"logps": -78.68167877197266, |
|
"loss": 0.0882, |
|
"objective": 0.0891619473695755, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.0891619473695755, |
|
"step": 610, |
|
"wo_beta": 15.383539199829102 |
|
}, |
|
{ |
|
"dpo_loss": 0.6973095536231995, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 27.24577954816879, |
|
"learning_rate": 4.439004011435979e-07, |
|
"logits": -1.2362395524978638, |
|
"logps": -79.0839614868164, |
|
"loss": 0.0875, |
|
"objective": 0.08598390221595764, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.08598390221595764, |
|
"step": 615, |
|
"wo_beta": 15.571494102478027 |
|
}, |
|
{ |
|
"dpo_loss": 0.6836999654769897, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 26.787162425144906, |
|
"learning_rate": 4.3569329714950703e-07, |
|
"logits": -1.2427488565444946, |
|
"logps": -79.54029846191406, |
|
"loss": 0.0838, |
|
"objective": 0.08879180997610092, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.08879180997610092, |
|
"step": 620, |
|
"wo_beta": 15.718174934387207 |
|
}, |
|
{ |
|
"dpo_loss": 0.6966572999954224, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 27.034118419678652, |
|
"learning_rate": 4.275037739658771e-07, |
|
"logits": -1.1582579612731934, |
|
"logps": -78.85964965820312, |
|
"loss": 0.0817, |
|
"objective": 0.08299548178911209, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.08299548178911209, |
|
"step": 625, |
|
"wo_beta": 14.923952102661133 |
|
}, |
|
{ |
|
"dpo_loss": 0.688913881778717, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 25.951621085094303, |
|
"learning_rate": 4.193340705265745e-07, |
|
"logits": -1.1893038749694824, |
|
"logps": -80.92503356933594, |
|
"loss": 0.0785, |
|
"objective": 0.08198042213916779, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.08198042213916779, |
|
"step": 630, |
|
"wo_beta": 15.90516185760498 |
|
}, |
|
{ |
|
"dpo_loss": 0.6924195885658264, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 26.232506833263244, |
|
"learning_rate": 4.1118642034694565e-07, |
|
"logits": -1.2785860300064087, |
|
"logps": -79.61809539794922, |
|
"loss": 0.0829, |
|
"objective": 0.08000766485929489, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.08000766485929489, |
|
"step": 635, |
|
"wo_beta": 15.796289443969727 |
|
}, |
|
{ |
|
"dpo_loss": 0.6959947943687439, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 27.993983855367574, |
|
"learning_rate": 4.030630509131959e-07, |
|
"logits": -1.2194859981536865, |
|
"logps": -80.71635437011719, |
|
"loss": 0.0842, |
|
"objective": 0.089814692735672, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.089814692735672, |
|
"step": 640, |
|
"wo_beta": 16.932401657104492 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927257776260376, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 28.107934645205802, |
|
"learning_rate": 3.9496618307341713e-07, |
|
"logits": -1.256467342376709, |
|
"logps": -81.03665161132812, |
|
"loss": 0.0853, |
|
"objective": 0.0889531597495079, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.0889531597495079, |
|
"step": 645, |
|
"wo_beta": 15.800675392150879 |
|
}, |
|
{ |
|
"dpo_loss": 0.6969668865203857, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 27.015977070193543, |
|
"learning_rate": 3.8689803043042996e-07, |
|
"logits": -1.2903110980987549, |
|
"logps": -80.92781829833984, |
|
"loss": 0.0847, |
|
"objective": 0.0801667794585228, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.0801667794585228, |
|
"step": 650, |
|
"wo_beta": 15.173321723937988 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.6974130868911743, |
|
"eval_logits": -1.2229208946228027, |
|
"eval_logps": -86.1129150390625, |
|
"eval_loss": 0.1379525512456894, |
|
"eval_objective": 0.13563887774944305, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 0.13563887774944305, |
|
"eval_runtime": 538.8083, |
|
"eval_samples_per_second": 10.746, |
|
"eval_steps_per_second": 0.896, |
|
"eval_wo_beta": 15.666037559509277, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896820068359375, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 26.639855046988597, |
|
"learning_rate": 3.788607987366069e-07, |
|
"logits": -1.1662521362304688, |
|
"logps": -78.9451675415039, |
|
"loss": 0.081, |
|
"objective": 0.08504978567361832, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.08504978567361832, |
|
"step": 655, |
|
"wo_beta": 15.233590126037598 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891672611236572, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 25.995274477757608, |
|
"learning_rate": 3.708566852908418e-07, |
|
"logits": -1.2193191051483154, |
|
"logps": -81.20162200927734, |
|
"loss": 0.0781, |
|
"objective": 0.08211526274681091, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.08211526274681091, |
|
"step": 660, |
|
"wo_beta": 16.372514724731445 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908305287361145, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 28.66146531985666, |
|
"learning_rate": 3.6288787833783016e-07, |
|
"logits": -1.2218626737594604, |
|
"logps": -80.04493713378906, |
|
"loss": 0.0815, |
|
"objective": 0.08463230729103088, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.08463230729103088, |
|
"step": 665, |
|
"wo_beta": 15.034836769104004 |
|
}, |
|
{ |
|
"dpo_loss": 0.6863933801651001, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 30.111613598581105, |
|
"learning_rate": 3.5495655646982503e-07, |
|
"logits": -1.1576950550079346, |
|
"logps": -78.58309173583984, |
|
"loss": 0.0755, |
|
"objective": 0.07363765686750412, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.07363765686750412, |
|
"step": 670, |
|
"wo_beta": 16.09279441833496 |
|
}, |
|
{ |
|
"dpo_loss": 0.695208728313446, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 27.241452477717303, |
|
"learning_rate": 3.470648880310313e-07, |
|
"logits": -1.1648114919662476, |
|
"logps": -79.5347671508789, |
|
"loss": 0.0735, |
|
"objective": 0.07240771502256393, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.07240771502256393, |
|
"step": 675, |
|
"wo_beta": 15.440892219543457 |
|
}, |
|
{ |
|
"dpo_loss": 0.6950518488883972, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 25.198332305215366, |
|
"learning_rate": 3.3921503052480236e-07, |
|
"logits": -1.2177760601043701, |
|
"logps": -81.27088165283203, |
|
"loss": 0.0778, |
|
"objective": 0.07866664230823517, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.07866664230823517, |
|
"step": 680, |
|
"wo_beta": 14.848203659057617 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911803483963013, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 26.10726119743999, |
|
"learning_rate": 3.314091300237999e-07, |
|
"logits": -1.1625895500183105, |
|
"logps": -78.15774536132812, |
|
"loss": 0.0738, |
|
"objective": 0.07330299913883209, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.07330299913883209, |
|
"step": 685, |
|
"wo_beta": 15.467205047607422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6860196590423584, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 26.093926175967837, |
|
"learning_rate": 3.236493205832794e-07, |
|
"logits": -1.21792733669281, |
|
"logps": -79.12659454345703, |
|
"loss": 0.071, |
|
"objective": 0.07433832436800003, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.07433832436800003, |
|
"step": 690, |
|
"wo_beta": 15.627902030944824 |
|
}, |
|
{ |
|
"dpo_loss": 0.6922653317451477, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 26.38661425001647, |
|
"learning_rate": 3.15937723657661e-07, |
|
"logits": -1.1168206930160522, |
|
"logps": -79.83128356933594, |
|
"loss": 0.0723, |
|
"objective": 0.06720028072595596, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.06720028072595596, |
|
"step": 695, |
|
"wo_beta": 16.023746490478516 |
|
}, |
|
{ |
|
"dpo_loss": 0.6856616139411926, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 27.380948849082866, |
|
"learning_rate": 3.082764475205442e-07, |
|
"logits": -1.103851079940796, |
|
"logps": -80.37809753417969, |
|
"loss": 0.071, |
|
"objective": 0.0717112347483635, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.0717112347483635, |
|
"step": 700, |
|
"wo_beta": 14.657614707946777 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.6979688405990601, |
|
"eval_logits": -1.220837116241455, |
|
"eval_logps": -85.24955749511719, |
|
"eval_loss": 0.1420368105173111, |
|
"eval_objective": 0.14046597480773926, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5253623127937317, |
|
"eval_regularize": 0.14046597480773926, |
|
"eval_runtime": 531.1966, |
|
"eval_samples_per_second": 10.9, |
|
"eval_steps_per_second": 0.909, |
|
"eval_wo_beta": 15.610904693603516, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904897093772888, |
|
"epoch": 1.9981105337742089, |
|
"grad_norm": 27.03253447324609, |
|
"learning_rate": 3.006675866883275e-07, |
|
"logits": -1.0365864038467407, |
|
"logps": -79.36177062988281, |
|
"loss": 0.0704, |
|
"objective": 0.07408583164215088, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.07408583164215088, |
|
"step": 705, |
|
"wo_beta": 16.758014678955078 |
|
}, |
|
{ |
|
"dpo_loss": 0.6849521398544312, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 24.956147004394822, |
|
"learning_rate": 2.931132213475884e-07, |
|
"logits": -1.1888701915740967, |
|
"logps": -78.96455383300781, |
|
"loss": 0.0619, |
|
"objective": 0.06422288715839386, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.06422288715839386, |
|
"step": 710, |
|
"wo_beta": 15.760772705078125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6961538791656494, |
|
"epoch": 2.026452527161077, |
|
"grad_norm": 27.279846270487834, |
|
"learning_rate": 2.856154167863814e-07, |
|
"logits": -1.1860238313674927, |
|
"logps": -78.40641021728516, |
|
"loss": 0.0631, |
|
"objective": 0.06441039592027664, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.06441039592027664, |
|
"step": 715, |
|
"wo_beta": 14.784539222717285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898289322853088, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 28.14233189102926, |
|
"learning_rate": 2.7817622282960813e-07, |
|
"logits": -1.1884685754776, |
|
"logps": -79.12120819091797, |
|
"loss": 0.0633, |
|
"objective": 0.06231885775923729, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.06231885775923729, |
|
"step": 720, |
|
"wo_beta": 15.765007972717285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928841471672058, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 25.78908501428665, |
|
"learning_rate": 2.707976732786166e-07, |
|
"logits": -1.1958059072494507, |
|
"logps": -81.6028060913086, |
|
"loss": 0.0578, |
|
"objective": 0.06330116838216782, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.06330116838216782, |
|
"step": 725, |
|
"wo_beta": 13.992663383483887 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905084252357483, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 26.140030636203093, |
|
"learning_rate": 2.6348178535517965e-07, |
|
"logits": -1.2607707977294922, |
|
"logps": -79.21609497070312, |
|
"loss": 0.0598, |
|
"objective": 0.05353347584605217, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.05353347584605217, |
|
"step": 730, |
|
"wo_beta": 15.095206260681152 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901918053627014, |
|
"epoch": 2.0831365139348135, |
|
"grad_norm": 27.302640565922513, |
|
"learning_rate": 2.5623055915000686e-07, |
|
"logits": -1.1885894536972046, |
|
"logps": -78.86723327636719, |
|
"loss": 0.0579, |
|
"objective": 0.05939151346683502, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.05939151346683502, |
|
"step": 735, |
|
"wo_beta": 16.905290603637695 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903797388076782, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 25.718973789328345, |
|
"learning_rate": 2.490459770759398e-07, |
|
"logits": -1.2478386163711548, |
|
"logps": -79.14292907714844, |
|
"loss": 0.0573, |
|
"objective": 0.05540405213832855, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.05540405213832855, |
|
"step": 740, |
|
"wo_beta": 15.3594331741333 |
|
}, |
|
{ |
|
"dpo_loss": 0.6952056288719177, |
|
"epoch": 2.1114785073216815, |
|
"grad_norm": 26.12282917762503, |
|
"learning_rate": 2.419300033259798e-07, |
|
"logits": -1.1640416383743286, |
|
"logps": -79.09960174560547, |
|
"loss": 0.0628, |
|
"objective": 0.0631415918469429, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.0631415918469429, |
|
"step": 745, |
|
"wo_beta": 14.359167098999023 |
|
}, |
|
{ |
|
"dpo_loss": 0.6888077259063721, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 25.53259897003242, |
|
"learning_rate": 2.3488458333629773e-07, |
|
"logits": -1.2182810306549072, |
|
"logps": -78.26011657714844, |
|
"loss": 0.0546, |
|
"objective": 0.05781084671616554, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.05781084671616554, |
|
"step": 750, |
|
"wo_beta": 15.271900177001953 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.6980140209197998, |
|
"eval_logits": -1.2232871055603027, |
|
"eval_logps": -85.46907806396484, |
|
"eval_loss": 0.14231154322624207, |
|
"eval_objective": 0.14071756601333618, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5258799195289612, |
|
"eval_regularize": 0.14071756601333618, |
|
"eval_runtime": 525.9214, |
|
"eval_samples_per_second": 11.009, |
|
"eval_steps_per_second": 0.918, |
|
"eval_wo_beta": 15.648022651672363, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901395320892334, |
|
"epoch": 2.13982050070855, |
|
"grad_norm": 25.775133405076527, |
|
"learning_rate": 2.2791164325437046e-07, |
|
"logits": -1.2039532661437988, |
|
"logps": -80.76856994628906, |
|
"loss": 0.0536, |
|
"objective": 0.054485421627759933, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.054485421627759933, |
|
"step": 755, |
|
"wo_beta": 16.363035202026367 |
|
}, |
|
{ |
|
"dpo_loss": 0.6922858953475952, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 25.744794188993545, |
|
"learning_rate": 2.21013089412392e-07, |
|
"logits": -1.1505485773086548, |
|
"logps": -77.95565795898438, |
|
"loss": 0.0596, |
|
"objective": 0.056366052478551865, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.056366052478551865, |
|
"step": 760, |
|
"wo_beta": 14.503907203674316 |
|
}, |
|
{ |
|
"dpo_loss": 0.6935012936592102, |
|
"epoch": 2.168162494095418, |
|
"grad_norm": 25.81314805277084, |
|
"learning_rate": 2.1419080780610122e-07, |
|
"logits": -1.195157527923584, |
|
"logps": -79.0260009765625, |
|
"loss": 0.0569, |
|
"objective": 0.05813807621598244, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.05813807621598244, |
|
"step": 765, |
|
"wo_beta": 15.846463203430176 |
|
}, |
|
{ |
|
"dpo_loss": 0.6929753422737122, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 26.17366253681256, |
|
"learning_rate": 2.0744666357916925e-07, |
|
"logits": -1.2156563997268677, |
|
"logps": -79.0594253540039, |
|
"loss": 0.0599, |
|
"objective": 0.06166267395019531, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.06166267395019531, |
|
"step": 770, |
|
"wo_beta": 13.665863037109375 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904846429824829, |
|
"epoch": 2.196504487482286, |
|
"grad_norm": 24.80909315966262, |
|
"learning_rate": 2.0078250051328782e-07, |
|
"logits": -1.20059072971344, |
|
"logps": -79.86570739746094, |
|
"loss": 0.0593, |
|
"objective": 0.05707041174173355, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.05707041174173355, |
|
"step": 775, |
|
"wo_beta": 16.53993797302246 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920241117477417, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 26.21741329158667, |
|
"learning_rate": 1.942001405240979e-07, |
|
"logits": -1.1453113555908203, |
|
"logps": -79.6847152709961, |
|
"loss": 0.0544, |
|
"objective": 0.05578133091330528, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.44583332538604736, |
|
"ranking_simple": 0.4375, |
|
"regularize": 0.05578133091330528, |
|
"step": 780, |
|
"wo_beta": 15.170312881469727 |
|
}, |
|
{ |
|
"dpo_loss": 0.6935942769050598, |
|
"epoch": 2.2248464808691546, |
|
"grad_norm": 28.321911906643972, |
|
"learning_rate": 1.877013831630961e-07, |
|
"logits": -1.1368038654327393, |
|
"logps": -79.92477416992188, |
|
"loss": 0.0563, |
|
"objective": 0.0578266978263855, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.0578266978263855, |
|
"step": 785, |
|
"wo_beta": 14.784603118896484 |
|
}, |
|
{ |
|
"dpo_loss": 0.6887209415435791, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 25.1538491328267, |
|
"learning_rate": 1.812880051256551e-07, |
|
"logits": -1.1384888887405396, |
|
"logps": -80.59889221191406, |
|
"loss": 0.0504, |
|
"objective": 0.04905276745557785, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.04905276745557785, |
|
"step": 790, |
|
"wo_beta": 14.593072891235352 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941591501235962, |
|
"epoch": 2.2531884742560226, |
|
"grad_norm": 25.628975208912717, |
|
"learning_rate": 1.7496175976529337e-07, |
|
"logits": -1.1934906244277954, |
|
"logps": -81.73139953613281, |
|
"loss": 0.053, |
|
"objective": 0.05859142541885376, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.05859142541885376, |
|
"step": 795, |
|
"wo_beta": 13.79269790649414 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919682621955872, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 26.293732850411818, |
|
"learning_rate": 1.6872437661432516e-07, |
|
"logits": -1.2084691524505615, |
|
"logps": -80.88973999023438, |
|
"loss": 0.0531, |
|
"objective": 0.05279294773936272, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.05279294773936272, |
|
"step": 800, |
|
"wo_beta": 14.340437889099121 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6981291174888611, |
|
"eval_logits": -1.220612645149231, |
|
"eval_logps": -86.13679504394531, |
|
"eval_loss": 0.138593852519989, |
|
"eval_objective": 0.13714565336704254, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 0.13714565336704254, |
|
"eval_runtime": 503.3749, |
|
"eval_samples_per_second": 11.502, |
|
"eval_steps_per_second": 0.96, |
|
"eval_wo_beta": 15.623366355895996, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.6895002126693726, |
|
"epoch": 2.2815304676428907, |
|
"grad_norm": 26.85852458075238, |
|
"learning_rate": 1.62577560911024e-07, |
|
"logits": -1.1975409984588623, |
|
"logps": -79.75126647949219, |
|
"loss": 0.0473, |
|
"objective": 0.047933317720890045, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.047933317720890045, |
|
"step": 805, |
|
"wo_beta": 15.455560684204102 |
|
}, |
|
{ |
|
"dpo_loss": 0.693041980266571, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 27.109828632522476, |
|
"learning_rate": 1.565229931334277e-07, |
|
"logits": -1.2860682010650635, |
|
"logps": -79.39039611816406, |
|
"loss": 0.051, |
|
"objective": 0.04613161459565163, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.04613161459565163, |
|
"step": 810, |
|
"wo_beta": 13.837719917297363 |
|
}, |
|
{ |
|
"dpo_loss": 0.6961421966552734, |
|
"epoch": 2.309872461029759, |
|
"grad_norm": 26.77232369418631, |
|
"learning_rate": 1.5056232853991208e-07, |
|
"logits": -1.2426903247833252, |
|
"logps": -80.33802032470703, |
|
"loss": 0.0483, |
|
"objective": 0.04774492606520653, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 0.04774492606520653, |
|
"step": 815, |
|
"wo_beta": 15.377904891967773 |
|
}, |
|
{ |
|
"dpo_loss": 0.6943568587303162, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 25.84415791966093, |
|
"learning_rate": 1.4469719671666043e-07, |
|
"logits": -1.1784952878952026, |
|
"logps": -79.52135467529297, |
|
"loss": 0.0497, |
|
"objective": 0.0464615561068058, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.0464615561068058, |
|
"step": 820, |
|
"wo_beta": 14.641592979431152 |
|
}, |
|
{ |
|
"dpo_loss": 0.6941722631454468, |
|
"epoch": 2.3382144544166272, |
|
"grad_norm": 26.057445300358456, |
|
"learning_rate": 1.389292011321498e-07, |
|
"logits": -1.1956678628921509, |
|
"logps": -78.97592163085938, |
|
"loss": 0.0489, |
|
"objective": 0.04843177646398544, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.04843177646398544, |
|
"step": 825, |
|
"wo_beta": 15.882107734680176 |
|
}, |
|
{ |
|
"dpo_loss": 0.6919335722923279, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 25.587425832586177, |
|
"learning_rate": 1.3325991869878012e-07, |
|
"logits": -1.1966559886932373, |
|
"logps": -81.00519561767578, |
|
"loss": 0.0487, |
|
"objective": 0.05618049576878548, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.05618049576878548, |
|
"step": 830, |
|
"wo_beta": 15.746501922607422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917215585708618, |
|
"epoch": 2.3665564478034957, |
|
"grad_norm": 25.756644403885232, |
|
"learning_rate": 1.2769089934176126e-07, |
|
"logits": -1.168601632118225, |
|
"logps": -80.84972381591797, |
|
"loss": 0.0488, |
|
"objective": 0.052498627454042435, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.052498627454042435, |
|
"step": 835, |
|
"wo_beta": 14.608040809631348 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898554563522339, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 25.072094771225707, |
|
"learning_rate": 1.222236655753791e-07, |
|
"logits": -1.1249865293502808, |
|
"logps": -80.45842742919922, |
|
"loss": 0.0434, |
|
"objective": 0.04277409613132477, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4625000059604645, |
|
"regularize": 0.04277409613132477, |
|
"step": 840, |
|
"wo_beta": 16.011308670043945 |
|
}, |
|
{ |
|
"dpo_loss": 0.6897058486938477, |
|
"epoch": 2.3948984411903638, |
|
"grad_norm": 31.2138593781791, |
|
"learning_rate": 1.1685971208675538e-07, |
|
"logits": -1.1826022863388062, |
|
"logps": -81.36385345458984, |
|
"loss": 0.0438, |
|
"objective": 0.04376084357500076, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.04376084357500076, |
|
"step": 845, |
|
"wo_beta": 15.694497108459473 |
|
}, |
|
{ |
|
"dpo_loss": 0.689830482006073, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 26.424193566129606, |
|
"learning_rate": 1.1160050532721527e-07, |
|
"logits": -1.2078933715820312, |
|
"logps": -79.71755981445312, |
|
"loss": 0.0444, |
|
"objective": 0.04779530316591263, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.04779530316591263, |
|
"step": 850, |
|
"wo_beta": 15.619561195373535 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.6980399489402771, |
|
"eval_logits": -1.2270959615707397, |
|
"eval_logps": -86.03622436523438, |
|
"eval_loss": 0.13948112726211548, |
|
"eval_objective": 0.1381867229938507, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.1381867229938507, |
|
"eval_runtime": 508.2715, |
|
"eval_samples_per_second": 11.392, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wo_beta": 15.647224426269531, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.690664529800415, |
|
"epoch": 2.423240434577232, |
|
"grad_norm": 26.086004792829357, |
|
"learning_rate": 1.0644748311137375e-07, |
|
"logits": -1.2208842039108276, |
|
"logps": -79.23947143554688, |
|
"loss": 0.0431, |
|
"objective": 0.044093988835811615, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.044093988835811615, |
|
"step": 855, |
|
"wo_beta": 14.724575996398926 |
|
}, |
|
{ |
|
"dpo_loss": 0.6878847479820251, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 24.819758120044014, |
|
"learning_rate": 1.0140205422405212e-07, |
|
"logits": -1.172597050666809, |
|
"logps": -80.47863006591797, |
|
"loss": 0.0425, |
|
"objective": 0.044025711715221405, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.044025711715221405, |
|
"step": 860, |
|
"wo_beta": 15.323599815368652 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902381777763367, |
|
"epoch": 2.4515824279641003, |
|
"grad_norm": 27.313034441936136, |
|
"learning_rate": 9.646559803512993e-08, |
|
"logits": -1.2031606435775757, |
|
"logps": -79.59320831298828, |
|
"loss": 0.0444, |
|
"objective": 0.04272008314728737, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.04272008314728737, |
|
"step": 865, |
|
"wo_beta": 15.875487327575684 |
|
}, |
|
{ |
|
"dpo_loss": 0.6910372376441956, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 25.76666127477957, |
|
"learning_rate": 9.163946412243895e-08, |
|
"logits": -1.2454520463943481, |
|
"logps": -80.33094024658203, |
|
"loss": 0.0442, |
|
"objective": 0.04635915905237198, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.04635915905237198, |
|
"step": 870, |
|
"wo_beta": 14.93254566192627 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905195713043213, |
|
"epoch": 2.4799244213509684, |
|
"grad_norm": 25.65493367025704, |
|
"learning_rate": 8.692497190280224e-08, |
|
"logits": -1.193867802619934, |
|
"logps": -79.73404693603516, |
|
"loss": 0.044, |
|
"objective": 0.04675581306219101, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.04675581306219101, |
|
"step": 875, |
|
"wo_beta": 16.489763259887695 |
|
}, |
|
{ |
|
"dpo_loss": 0.6905779242515564, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 26.621663140091542, |
|
"learning_rate": 8.232341027131883e-08, |
|
"logits": -1.1066038608551025, |
|
"logps": -79.80467224121094, |
|
"loss": 0.0446, |
|
"objective": 0.046583421528339386, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.046583421528339386, |
|
"step": 880, |
|
"wo_beta": 17.46852684020996 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917292475700378, |
|
"epoch": 2.5082664147378364, |
|
"grad_norm": 24.02209120686893, |
|
"learning_rate": 7.783603724899257e-08, |
|
"logits": -1.25592041015625, |
|
"logps": -79.1759262084961, |
|
"loss": 0.0422, |
|
"objective": 0.04294423386454582, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.04294423386454582, |
|
"step": 885, |
|
"wo_beta": 16.415306091308594 |
|
}, |
|
{ |
|
"dpo_loss": 0.6880825161933899, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 26.181840029139675, |
|
"learning_rate": 7.346407963880136e-08, |
|
"logits": -1.1791417598724365, |
|
"logps": -78.21730041503906, |
|
"loss": 0.0424, |
|
"objective": 0.03773224726319313, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.03773224726319313, |
|
"step": 890, |
|
"wo_beta": 13.494309425354004 |
|
}, |
|
{ |
|
"dpo_loss": 0.692958414554596, |
|
"epoch": 2.536608408124705, |
|
"grad_norm": 27.615133075738825, |
|
"learning_rate": 6.92087326903022e-08, |
|
"logits": -1.175589680671692, |
|
"logps": -80.6869888305664, |
|
"loss": 0.0444, |
|
"objective": 0.0476791188120842, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.0476791188120842, |
|
"step": 895, |
|
"wo_beta": 16.41474151611328 |
|
}, |
|
{ |
|
"dpo_loss": 0.6935379505157471, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 25.263999580012257, |
|
"learning_rate": 6.507115977286143e-08, |
|
"logits": -1.1382538080215454, |
|
"logps": -79.20881652832031, |
|
"loss": 0.0438, |
|
"objective": 0.044265471398830414, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.044265471398830414, |
|
"step": 900, |
|
"wo_beta": 15.096195220947266 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.6975382566452026, |
|
"eval_logits": -1.2295913696289062, |
|
"eval_logps": -85.88396453857422, |
|
"eval_loss": 0.13868437707424164, |
|
"eval_objective": 0.13740767538547516, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.13740767538547516, |
|
"eval_runtime": 525.8368, |
|
"eval_samples_per_second": 11.011, |
|
"eval_steps_per_second": 0.919, |
|
"eval_wo_beta": 15.634546279907227, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917089819908142, |
|
"epoch": 2.564950401511573, |
|
"grad_norm": 25.44195334625603, |
|
"learning_rate": 6.105249205760127e-08, |
|
"logits": -1.2037063837051392, |
|
"logps": -79.04875183105469, |
|
"loss": 0.0411, |
|
"objective": 0.03601410239934921, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.03601410239934921, |
|
"step": 905, |
|
"wo_beta": 14.861380577087402 |
|
}, |
|
{ |
|
"dpo_loss": 0.6932801008224487, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 26.495925146665332, |
|
"learning_rate": 5.7153828208148846e-08, |
|
"logits": -1.1827551126480103, |
|
"logps": -81.922607421875, |
|
"loss": 0.0424, |
|
"objective": 0.04883956164121628, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.04883956164121628, |
|
"step": 910, |
|
"wo_beta": 15.852696418762207 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898232102394104, |
|
"epoch": 2.593292394898441, |
|
"grad_norm": 25.88822340642525, |
|
"learning_rate": 5.337623408027292e-08, |
|
"logits": -1.2935634851455688, |
|
"logps": -80.87789916992188, |
|
"loss": 0.0403, |
|
"objective": 0.040093984454870224, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.040093984454870224, |
|
"step": 915, |
|
"wo_beta": 14.905534744262695 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920287013053894, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 25.364010577767672, |
|
"learning_rate": 4.972074243048896e-08, |
|
"logits": -1.1468993425369263, |
|
"logps": -79.89569854736328, |
|
"loss": 0.0396, |
|
"objective": 0.03967604413628578, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.03967604413628578, |
|
"step": 920, |
|
"wo_beta": 15.246692657470703 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928901076316833, |
|
"epoch": 2.6216343882853095, |
|
"grad_norm": 27.967184575096596, |
|
"learning_rate": 4.6188352633713956e-08, |
|
"logits": -1.1743673086166382, |
|
"logps": -80.17101287841797, |
|
"loss": 0.0417, |
|
"objective": 0.04370425269007683, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.04370425269007683, |
|
"step": 925, |
|
"wo_beta": 16.336292266845703 |
|
}, |
|
{ |
|
"dpo_loss": 0.688522458076477, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 26.578359144982873, |
|
"learning_rate": 4.2780030410047796e-08, |
|
"logits": -1.1617387533187866, |
|
"logps": -79.97476196289062, |
|
"loss": 0.0365, |
|
"objective": 0.03662450239062309, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44583332538604736, |
|
"ranking_simple": 0.44583332538604736, |
|
"regularize": 0.03662450239062309, |
|
"step": 930, |
|
"wo_beta": 16.801166534423828 |
|
}, |
|
{ |
|
"dpo_loss": 0.6928302645683289, |
|
"epoch": 2.6499763816721775, |
|
"grad_norm": 26.6756558913633, |
|
"learning_rate": 3.949670756075446e-08, |
|
"logits": -1.1548212766647339, |
|
"logps": -78.78431701660156, |
|
"loss": 0.0364, |
|
"objective": 0.0356716513633728, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.0356716513633728, |
|
"step": 935, |
|
"wo_beta": 15.733369827270508 |
|
}, |
|
{ |
|
"dpo_loss": 0.6884638071060181, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 26.11837122854028, |
|
"learning_rate": 3.63392817135173e-08, |
|
"logits": -1.213140845298767, |
|
"logps": -81.39899444580078, |
|
"loss": 0.0357, |
|
"objective": 0.03838236257433891, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.03838236257433891, |
|
"step": 940, |
|
"wo_beta": 16.71453094482422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904810070991516, |
|
"epoch": 2.678318375059046, |
|
"grad_norm": 26.48243005501328, |
|
"learning_rate": 3.330861607703611e-08, |
|
"logits": -1.2477443218231201, |
|
"logps": -80.07948303222656, |
|
"loss": 0.0369, |
|
"objective": 0.03517834097146988, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.03517834097146988, |
|
"step": 945, |
|
"wo_beta": 15.665254592895508 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894643902778625, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 26.269248260275482, |
|
"learning_rate": 3.040553920503502e-08, |
|
"logits": -1.1376032829284668, |
|
"logps": -80.89375305175781, |
|
"loss": 0.0384, |
|
"objective": 0.03873926401138306, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.03873926401138306, |
|
"step": 950, |
|
"wo_beta": 14.65186882019043 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.6974536180496216, |
|
"eval_logits": -1.2285144329071045, |
|
"eval_logps": -85.95899963378906, |
|
"eval_loss": 0.13796193897724152, |
|
"eval_objective": 0.13680347800254822, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.523809552192688, |
|
"eval_regularize": 0.13680347800254822, |
|
"eval_runtime": 502.396, |
|
"eval_samples_per_second": 11.525, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wo_beta": 15.642508506774902, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.6890572905540466, |
|
"epoch": 2.706660368445914, |
|
"grad_norm": 24.74397275822761, |
|
"learning_rate": 2.7630844769743756e-08, |
|
"logits": -1.2225416898727417, |
|
"logps": -79.87822723388672, |
|
"loss": 0.0403, |
|
"objective": 0.04285174608230591, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4749999940395355, |
|
"regularize": 0.04285174608230591, |
|
"step": 955, |
|
"wo_beta": 13.80820369720459 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908868551254272, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 25.907101929875015, |
|
"learning_rate": 2.4985291344915673e-08, |
|
"logits": -1.1964094638824463, |
|
"logps": -79.958740234375, |
|
"loss": 0.0384, |
|
"objective": 0.03498096391558647, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.03498096391558647, |
|
"step": 960, |
|
"wo_beta": 16.096843719482422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6898122429847717, |
|
"epoch": 2.735002361832782, |
|
"grad_norm": 26.015895295989438, |
|
"learning_rate": 2.2469602198441573e-08, |
|
"logits": -1.2220391035079956, |
|
"logps": -80.10702514648438, |
|
"loss": 0.0368, |
|
"objective": 0.03775167092680931, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.03775167092680931, |
|
"step": 965, |
|
"wo_beta": 14.61376953125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917709112167358, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 24.33103792831753, |
|
"learning_rate": 2.008446509461498e-08, |
|
"logits": -1.2293510437011719, |
|
"logps": -81.0619888305664, |
|
"loss": 0.0341, |
|
"objective": 0.03296136483550072, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.03296136483550072, |
|
"step": 970, |
|
"wo_beta": 14.957200050354004 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909447908401489, |
|
"epoch": 2.7633443552196506, |
|
"grad_norm": 24.892680282575437, |
|
"learning_rate": 1.7830532106104746e-08, |
|
"logits": -1.1391520500183105, |
|
"logps": -79.50247955322266, |
|
"loss": 0.0358, |
|
"objective": 0.03571467101573944, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.03571467101573944, |
|
"step": 975, |
|
"wo_beta": 15.747049331665039 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906387209892273, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 25.891776024282194, |
|
"learning_rate": 1.570841943568446e-08, |
|
"logits": -1.2599250078201294, |
|
"logps": -78.82478332519531, |
|
"loss": 0.0365, |
|
"objective": 0.03682435303926468, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.03682435303926468, |
|
"step": 980, |
|
"wo_beta": 14.397340774536133 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933729648590088, |
|
"epoch": 2.7916863486065187, |
|
"grad_norm": 24.71596998222205, |
|
"learning_rate": 1.3718707247769134e-08, |
|
"logits": -1.1248877048492432, |
|
"logps": -77.72516632080078, |
|
"loss": 0.038, |
|
"objective": 0.03822270780801773, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.03822270780801773, |
|
"step": 985, |
|
"wo_beta": 14.327728271484375 |
|
}, |
|
{ |
|
"dpo_loss": 0.691889762878418, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 26.185929406261582, |
|
"learning_rate": 1.1861939509803686e-08, |
|
"logits": -1.1771855354309082, |
|
"logps": -81.14643859863281, |
|
"loss": 0.0369, |
|
"objective": 0.036898624151945114, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.47083333134651184, |
|
"regularize": 0.036898624151945114, |
|
"step": 990, |
|
"wo_beta": 15.375889778137207 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891864538192749, |
|
"epoch": 2.820028341993387, |
|
"grad_norm": 24.803225677825235, |
|
"learning_rate": 1.0138623843548078e-08, |
|
"logits": -1.2396986484527588, |
|
"logps": -79.1412353515625, |
|
"loss": 0.0365, |
|
"objective": 0.04024568572640419, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.04024568572640419, |
|
"step": 995, |
|
"wo_beta": 16.440141677856445 |
|
}, |
|
{ |
|
"dpo_loss": 0.6907335519790649, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 24.80804716491088, |
|
"learning_rate": 8.54923138629815e-09, |
|
"logits": -1.1814649105072021, |
|
"logps": -78.3318862915039, |
|
"loss": 0.0375, |
|
"objective": 0.03398551046848297, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.03398551046848297, |
|
"step": 1000, |
|
"wo_beta": 14.515811920166016 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6973779201507568, |
|
"eval_logits": -1.2304595708847046, |
|
"eval_logps": -85.99760437011719, |
|
"eval_loss": 0.1379886120557785, |
|
"eval_objective": 0.1368565410375595, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 0.1368565410375595, |
|
"eval_runtime": 504.9482, |
|
"eval_samples_per_second": 11.467, |
|
"eval_steps_per_second": 0.957, |
|
"eval_wo_beta": 15.63548755645752, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911761164665222, |
|
"epoch": 2.848370335380255, |
|
"grad_norm": 27.32667601221845, |
|
"learning_rate": 7.09419666208183e-09, |
|
"logits": -1.1803662776947021, |
|
"logps": -78.7650375366211, |
|
"loss": 0.036, |
|
"objective": 0.03725501522421837, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.03725501522421837, |
|
"step": 1005, |
|
"wo_beta": 15.2937593460083 |
|
}, |
|
{ |
|
"dpo_loss": 0.6888595223426819, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 26.14400831689978, |
|
"learning_rate": 5.773917462864264e-09, |
|
"logits": -1.2407745122909546, |
|
"logps": -79.07453918457031, |
|
"loss": 0.0359, |
|
"objective": 0.03689141198992729, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.03689141198992729, |
|
"step": 1010, |
|
"wo_beta": 15.180621147155762 |
|
}, |
|
{ |
|
"dpo_loss": 0.6912004947662354, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 24.9602315307722, |
|
"learning_rate": 4.588754739795586e-09, |
|
"logits": -1.1721571683883667, |
|
"logps": -78.31599426269531, |
|
"loss": 0.0354, |
|
"objective": 0.03823023661971092, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.03823023661971092, |
|
"step": 1015, |
|
"wo_beta": 14.313817977905273 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896302700042725, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 24.85258883289883, |
|
"learning_rate": 3.53903250453047e-09, |
|
"logits": -1.1410295963287354, |
|
"logps": -80.05741882324219, |
|
"loss": 0.0343, |
|
"objective": 0.03470051661133766, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.03470051661133766, |
|
"step": 1020, |
|
"wo_beta": 17.722339630126953 |
|
}, |
|
{ |
|
"dpo_loss": 0.6912213563919067, |
|
"epoch": 2.9050543221539913, |
|
"grad_norm": 25.437671735836517, |
|
"learning_rate": 2.6250377406467627e-09, |
|
"logits": -1.2291027307510376, |
|
"logps": -80.00859832763672, |
|
"loss": 0.0379, |
|
"objective": 0.037315838038921356, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.037315838038921356, |
|
"step": 1025, |
|
"wo_beta": 14.656061172485352 |
|
}, |
|
{ |
|
"dpo_loss": 0.6911433935165405, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 24.681518212372314, |
|
"learning_rate": 1.8470203251865768e-09, |
|
"logits": -1.2523103952407837, |
|
"logps": -80.20305633544922, |
|
"loss": 0.035, |
|
"objective": 0.03597547858953476, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.03597547858953476, |
|
"step": 1030, |
|
"wo_beta": 16.243247985839844 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904833316802979, |
|
"epoch": 2.9333963155408598, |
|
"grad_norm": 26.808499612926756, |
|
"learning_rate": 1.2051929603428823e-09, |
|
"logits": -1.2276477813720703, |
|
"logps": -80.6124496459961, |
|
"loss": 0.0344, |
|
"objective": 0.03077917918562889, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.03077917918562889, |
|
"step": 1035, |
|
"wo_beta": 14.297567367553711 |
|
}, |
|
{ |
|
"dpo_loss": 0.6902684569358826, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 24.71043561481991, |
|
"learning_rate": 6.997311153086882e-10, |
|
"logits": -1.227773904800415, |
|
"logps": -80.38175201416016, |
|
"loss": 0.0364, |
|
"objective": 0.036134228110313416, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.036134228110313416, |
|
"step": 1040, |
|
"wo_beta": 16.110403060913086 |
|
}, |
|
{ |
|
"dpo_loss": 0.6894943118095398, |
|
"epoch": 2.961738308927728, |
|
"grad_norm": 26.305013618654215, |
|
"learning_rate": 3.3077297830541585e-10, |
|
"logits": -1.1821495294570923, |
|
"logps": -81.93363189697266, |
|
"loss": 0.0371, |
|
"objective": 0.04041092470288277, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.04041092470288277, |
|
"step": 1045, |
|
"wo_beta": 17.30424690246582 |
|
}, |
|
{ |
|
"dpo_loss": 0.6926708221435547, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 27.660126015515125, |
|
"learning_rate": 9.841941880361914e-11, |
|
"logits": -1.2283350229263306, |
|
"logps": -78.42631530761719, |
|
"loss": 0.0397, |
|
"objective": 0.03637199103832245, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.03637199103832245, |
|
"step": 1050, |
|
"wo_beta": 14.132574081420898 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.697369396686554, |
|
"eval_logits": -1.230570673942566, |
|
"eval_logps": -85.98023223876953, |
|
"eval_loss": 0.13814175128936768, |
|
"eval_objective": 0.13700547814369202, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5243270993232727, |
|
"eval_regularize": 0.13700547814369202, |
|
"eval_runtime": 530.5394, |
|
"eval_samples_per_second": 10.913, |
|
"eval_steps_per_second": 0.91, |
|
"eval_wo_beta": 15.63470458984375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.689972996711731, |
|
"epoch": 2.9900803023145963, |
|
"grad_norm": 25.71242634224602, |
|
"learning_rate": 2.7339599464326622e-12, |
|
"logits": -1.2016465663909912, |
|
"logps": -79.08844757080078, |
|
"loss": 0.0389, |
|
"objective": 0.03705615550279617, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.03705615550279617, |
|
"step": 1055, |
|
"wo_beta": 14.549761772155762 |
|
}, |
|
{ |
|
"epoch": 2.992914501653283, |
|
"step": 1056, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08480868444806247, |
|
"train_runtime": 47353.1169, |
|
"train_samples_per_second": 3.218, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|