qwen2.5-0.5b-expo-L1EXPO-noES-0.1 / trainer_state.json
hZzy's picture
Model save
fc395d0 verified
raw
history blame
129 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.992914501653283,
"eval_steps": 50,
"global_step": 1056,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 36.794102305076855,
"learning_rate": 9.433962264150943e-09,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 0.0051,
"objective": 0.0046141319908201694,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.0046141319908201694,
"step": 1,
"wo_beta": 14.841486930847168
},
{
"dpo_loss": 0.6930367350578308,
"epoch": 0.014170996693434105,
"grad_norm": 51.56528279298989,
"learning_rate": 4.7169811320754715e-08,
"logits": -1.4291929006576538,
"logps": -83.85256958007812,
"loss": 0.0058,
"objective": 0.005918528418987989,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4895833432674408,
"ranking_simple": 0.4895833432674408,
"regularize": 0.005918528418987989,
"step": 5,
"wo_beta": 16.667278289794922
},
{
"dpo_loss": 0.6930564641952515,
"epoch": 0.02834199338686821,
"grad_norm": 43.62540826850091,
"learning_rate": 9.433962264150943e-08,
"logits": -1.4014313220977783,
"logps": -84.90540313720703,
"loss": 0.0065,
"objective": 0.00607979716733098,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5791666507720947,
"regularize": 0.00607979716733098,
"step": 10,
"wo_beta": 15.295255661010742
},
{
"dpo_loss": 0.691772997379303,
"epoch": 0.042512990080302314,
"grad_norm": 40.579476886356176,
"learning_rate": 1.4150943396226414e-07,
"logits": -1.5395350456237793,
"logps": -84.67674255371094,
"loss": 0.0077,
"objective": 0.007744006346911192,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.007744006346911192,
"step": 15,
"wo_beta": 15.72358512878418
},
{
"dpo_loss": 0.6908682584762573,
"epoch": 0.05668398677373642,
"grad_norm": 38.45055261776428,
"learning_rate": 1.8867924528301886e-07,
"logits": -1.3619084358215332,
"logps": -83.87267303466797,
"loss": 0.0106,
"objective": 0.011018705554306507,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.011018705554306507,
"step": 20,
"wo_beta": 16.501863479614258
},
{
"dpo_loss": 0.6917246580123901,
"epoch": 0.07085498346717052,
"grad_norm": 37.49075261903623,
"learning_rate": 2.3584905660377358e-07,
"logits": -1.366659164428711,
"logps": -84.04557037353516,
"loss": 0.0144,
"objective": 0.012653553858399391,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5625,
"regularize": 0.012653553858399391,
"step": 25,
"wo_beta": 15.649717330932617
},
{
"dpo_loss": 0.6906312704086304,
"epoch": 0.08502598016060463,
"grad_norm": 35.42831042318107,
"learning_rate": 2.830188679245283e-07,
"logits": -1.4202715158462524,
"logps": -84.00289154052734,
"loss": 0.0156,
"objective": 0.015595527365803719,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.015595527365803719,
"step": 30,
"wo_beta": 16.955543518066406
},
{
"dpo_loss": 0.6931964755058289,
"epoch": 0.09919697685403873,
"grad_norm": 37.327321600930496,
"learning_rate": 3.30188679245283e-07,
"logits": -1.3935037851333618,
"logps": -83.39187622070312,
"loss": 0.0202,
"objective": 0.021191226318478584,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.021191226318478584,
"step": 35,
"wo_beta": 16.169347763061523
},
{
"dpo_loss": 0.693729817867279,
"epoch": 0.11336797354747284,
"grad_norm": 41.6880498675233,
"learning_rate": 3.773584905660377e-07,
"logits": -1.381697177886963,
"logps": -83.91118621826172,
"loss": 0.0228,
"objective": 0.02042653225362301,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.02042653225362301,
"step": 40,
"wo_beta": 14.309080123901367
},
{
"dpo_loss": 0.6919765472412109,
"epoch": 0.12753897024090693,
"grad_norm": 41.11048762433909,
"learning_rate": 4.2452830188679244e-07,
"logits": -1.3955552577972412,
"logps": -84.25520324707031,
"loss": 0.027,
"objective": 0.025382202118635178,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 0.025382202118635178,
"step": 45,
"wo_beta": 14.21595287322998
},
{
"dpo_loss": 0.6911224722862244,
"epoch": 0.14170996693434104,
"grad_norm": 41.07625280062658,
"learning_rate": 4.7169811320754717e-07,
"logits": -1.4127604961395264,
"logps": -85.3918685913086,
"loss": 0.0351,
"objective": 0.03202561289072037,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03202561289072037,
"step": 50,
"wo_beta": 15.589811325073242
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6926834583282471,
"eval_logits": -1.391736626625061,
"eval_logps": -91.23294067382812,
"eval_loss": 0.02213538996875286,
"eval_objective": 0.022384027019143105,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5212215185165405,
"eval_regularize": 0.022384027019143105,
"eval_runtime": 470.1076,
"eval_samples_per_second": 12.316,
"eval_steps_per_second": 1.027,
"eval_wo_beta": 16.221710205078125,
"step": 50
},
{
"dpo_loss": 0.6922997832298279,
"epoch": 0.15588096362777515,
"grad_norm": 36.466581476765526,
"learning_rate": 5.188679245283019e-07,
"logits": -1.3620656728744507,
"logps": -84.91451263427734,
"loss": 0.0367,
"objective": 0.0405682697892189,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4833333194255829,
"regularize": 0.0405682697892189,
"step": 55,
"wo_beta": 15.095004081726074
},
{
"dpo_loss": 0.6875351071357727,
"epoch": 0.17005196032120926,
"grad_norm": 36.25782748515131,
"learning_rate": 5.660377358490566e-07,
"logits": -1.28928804397583,
"logps": -85.71366119384766,
"loss": 0.0403,
"objective": 0.04035286232829094,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.04035286232829094,
"step": 60,
"wo_beta": 14.607115745544434
},
{
"dpo_loss": 0.6947705149650574,
"epoch": 0.18422295701464336,
"grad_norm": 41.25867915272223,
"learning_rate": 6.132075471698112e-07,
"logits": -1.3798266649246216,
"logps": -83.1692123413086,
"loss": 0.0491,
"objective": 0.050007414072752,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.050007414072752,
"step": 65,
"wo_beta": 14.976885795593262
},
{
"dpo_loss": 0.6880966424942017,
"epoch": 0.19839395370807747,
"grad_norm": 35.20333705483616,
"learning_rate": 6.60377358490566e-07,
"logits": -1.4017753601074219,
"logps": -85.73289489746094,
"loss": 0.0551,
"objective": 0.059768859297037125,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5249999761581421,
"regularize": 0.059768859297037125,
"step": 70,
"wo_beta": 15.204180717468262
},
{
"dpo_loss": 0.6949416995048523,
"epoch": 0.21256495040151158,
"grad_norm": 35.61853042350494,
"learning_rate": 7.075471698113207e-07,
"logits": -1.321311593055725,
"logps": -85.34779357910156,
"loss": 0.0579,
"objective": 0.06061805784702301,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 0.06061805784702301,
"step": 75,
"wo_beta": 14.980683326721191
},
{
"dpo_loss": 0.6930631995201111,
"epoch": 0.22673594709494568,
"grad_norm": 34.9536345678453,
"learning_rate": 7.547169811320754e-07,
"logits": -1.4264112710952759,
"logps": -84.01344299316406,
"loss": 0.0626,
"objective": 0.062408361583948135,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5333333611488342,
"regularize": 0.062408361583948135,
"step": 80,
"wo_beta": 16.357084274291992
},
{
"dpo_loss": 0.6939026117324829,
"epoch": 0.2409069437883798,
"grad_norm": 35.4653089608865,
"learning_rate": 8.018867924528302e-07,
"logits": -1.4041804075241089,
"logps": -83.52224731445312,
"loss": 0.0695,
"objective": 0.07861108332872391,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5416666865348816,
"regularize": 0.07861108332872391,
"step": 85,
"wo_beta": 14.987756729125977
},
{
"dpo_loss": 0.6886675357818604,
"epoch": 0.25507794048181387,
"grad_norm": 38.910010820592774,
"learning_rate": 8.490566037735849e-07,
"logits": -1.5007805824279785,
"logps": -84.52466583251953,
"loss": 0.0806,
"objective": 0.08859896659851074,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5208333134651184,
"regularize": 0.08859896659851074,
"step": 90,
"wo_beta": 15.482732772827148
},
{
"dpo_loss": 0.6961393356323242,
"epoch": 0.269248937175248,
"grad_norm": 38.50762322649532,
"learning_rate": 8.962264150943396e-07,
"logits": -1.4152452945709229,
"logps": -83.7827377319336,
"loss": 0.0851,
"objective": 0.08412078768014908,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.47083333134651184,
"regularize": 0.08412078768014908,
"step": 95,
"wo_beta": 16.229019165039062
},
{
"dpo_loss": 0.6928918361663818,
"epoch": 0.2834199338686821,
"grad_norm": 34.07886171444254,
"learning_rate": 9.433962264150943e-07,
"logits": -1.2942625284194946,
"logps": -81.22164916992188,
"loss": 0.0877,
"objective": 0.08352937549352646,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4833333194255829,
"regularize": 0.08352937549352646,
"step": 100,
"wo_beta": 15.187151908874512
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6921994090080261,
"eval_logits": -1.3862521648406982,
"eval_logps": -88.66019439697266,
"eval_loss": 0.04334083944559097,
"eval_objective": 0.04473063722252846,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 0.04473063722252846,
"eval_runtime": 472.2375,
"eval_samples_per_second": 12.261,
"eval_steps_per_second": 1.023,
"eval_wo_beta": 16.16818618774414,
"step": 100
},
{
"dpo_loss": 0.6989858150482178,
"epoch": 0.2975909305621162,
"grad_norm": 34.124768906394316,
"learning_rate": 9.90566037735849e-07,
"logits": -1.4883809089660645,
"logps": -83.63202667236328,
"loss": 0.0937,
"objective": 0.10326550155878067,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.10326550155878067,
"step": 105,
"wo_beta": 14.697186470031738
},
{
"dpo_loss": 0.6916998624801636,
"epoch": 0.3117619272555503,
"grad_norm": 36.53960499520599,
"learning_rate": 9.99956257238817e-07,
"logits": -1.3666936159133911,
"logps": -82.67723083496094,
"loss": 0.1009,
"objective": 0.09831760078668594,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.09831760078668594,
"step": 110,
"wo_beta": 14.75289249420166
},
{
"dpo_loss": 0.6972029805183411,
"epoch": 0.32593292394898443,
"grad_norm": 32.3431868996238,
"learning_rate": 9.997785653888834e-07,
"logits": -1.351915955543518,
"logps": -82.5732650756836,
"loss": 0.1062,
"objective": 0.10171337425708771,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5083333253860474,
"regularize": 0.10171337425708771,
"step": 115,
"wo_beta": 16.003950119018555
},
{
"dpo_loss": 0.6885399222373962,
"epoch": 0.3401039206424185,
"grad_norm": 35.92878266852989,
"learning_rate": 9.994642390694308e-07,
"logits": -1.367909550666809,
"logps": -82.90719604492188,
"loss": 0.1098,
"objective": 0.11067435145378113,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 0.11067435145378113,
"step": 120,
"wo_beta": 15.639138221740723
},
{
"dpo_loss": 0.6936843395233154,
"epoch": 0.35427491733585265,
"grad_norm": 30.26276247254467,
"learning_rate": 9.990133642141357e-07,
"logits": -1.3929860591888428,
"logps": -85.65290069580078,
"loss": 0.1056,
"objective": 0.11743973940610886,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4749999940395355,
"regularize": 0.11743973940610886,
"step": 125,
"wo_beta": 15.93514633178711
},
{
"dpo_loss": 0.6941003799438477,
"epoch": 0.3684459140292867,
"grad_norm": 39.21461417787312,
"learning_rate": 9.98426064087682e-07,
"logits": -1.3525993824005127,
"logps": -83.56419372558594,
"loss": 0.1211,
"objective": 0.11899420619010925,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.5833333134651184,
"regularize": 0.11899420619010925,
"step": 130,
"wo_beta": 16.0157527923584
},
{
"dpo_loss": 0.6882577538490295,
"epoch": 0.3826169107227208,
"grad_norm": 32.67768184928008,
"learning_rate": 9.977024992520601e-07,
"logits": -1.3901729583740234,
"logps": -84.39146423339844,
"loss": 0.1253,
"objective": 0.12414517998695374,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.12414517998695374,
"step": 135,
"wo_beta": 14.371219635009766
},
{
"dpo_loss": 0.6830641627311707,
"epoch": 0.39678790741615494,
"grad_norm": 33.07732649314307,
"learning_rate": 9.968428675226713e-07,
"logits": -1.3437649011611938,
"logps": -85.44697570800781,
"loss": 0.1248,
"objective": 0.12058641016483307,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5375000238418579,
"regularize": 0.12058641016483307,
"step": 140,
"wo_beta": 14.547070503234863
},
{
"dpo_loss": 0.6851420998573303,
"epoch": 0.410958904109589,
"grad_norm": 30.784646211601874,
"learning_rate": 9.958474039142469e-07,
"logits": -1.3567951917648315,
"logps": -86.4469223022461,
"loss": 0.1319,
"objective": 0.13056445121765137,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5625,
"regularize": 0.13056445121765137,
"step": 145,
"wo_beta": 13.91884994506836
},
{
"dpo_loss": 0.6960374116897583,
"epoch": 0.42512990080302315,
"grad_norm": 32.05337681597037,
"learning_rate": 9.947163805765979e-07,
"logits": -1.3565360307693481,
"logps": -86.30919647216797,
"loss": 0.1323,
"objective": 0.12925057113170624,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5375000238418579,
"regularize": 0.12925057113170624,
"step": 150,
"wo_beta": 16.796695709228516
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.695567786693573,
"eval_logits": -1.3053797483444214,
"eval_logps": -90.43773651123047,
"eval_loss": 0.07677316665649414,
"eval_objective": 0.07639209181070328,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5222567319869995,
"eval_regularize": 0.07639209181070328,
"eval_runtime": 526.1958,
"eval_samples_per_second": 11.004,
"eval_steps_per_second": 0.918,
"eval_wo_beta": 16.003387451171875,
"step": 150
},
{
"dpo_loss": 0.6933045983314514,
"epoch": 0.43930089749645723,
"grad_norm": 31.605620123374155,
"learning_rate": 9.934501067202117e-07,
"logits": -1.3933676481246948,
"logps": -83.03238677978516,
"loss": 0.1358,
"objective": 0.1285211592912674,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4791666567325592,
"regularize": 0.1285211592912674,
"step": 155,
"wo_beta": 15.31113338470459
},
{
"dpo_loss": 0.6946766972541809,
"epoch": 0.45347189418989137,
"grad_norm": 32.22880904067845,
"learning_rate": 9.92048928531717e-07,
"logits": -1.2931861877441406,
"logps": -83.0308837890625,
"loss": 0.1338,
"objective": 0.12377996742725372,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5416666865348816,
"regularize": 0.12377996742725372,
"step": 160,
"wo_beta": 14.51412296295166
},
{
"dpo_loss": 0.6858457326889038,
"epoch": 0.46764289088332545,
"grad_norm": 28.56289647538006,
"learning_rate": 9.905132290792392e-07,
"logits": -1.3845534324645996,
"logps": -84.35334777832031,
"loss": 0.1295,
"objective": 0.13048619031906128,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.13048619031906128,
"step": 165,
"wo_beta": 15.858311653137207
},
{
"dpo_loss": 0.6987485289573669,
"epoch": 0.4818138875767596,
"grad_norm": 31.697158183348822,
"learning_rate": 9.888434282076757e-07,
"logits": -1.3974741697311401,
"logps": -82.40156555175781,
"loss": 0.1376,
"objective": 0.14300216734409332,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 0.14300216734409332,
"step": 170,
"wo_beta": 15.730175018310547
},
{
"dpo_loss": 0.6993536353111267,
"epoch": 0.49598488427019366,
"grad_norm": 30.951333756278135,
"learning_rate": 9.870399824239114e-07,
"logits": -1.2470077276229858,
"logps": -83.35051727294922,
"loss": 0.1401,
"objective": 0.13475559651851654,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 0.13475559651851654,
"step": 175,
"wo_beta": 17.82953643798828
},
{
"dpo_loss": 0.6983634233474731,
"epoch": 0.5101558809636277,
"grad_norm": 34.822921079044,
"learning_rate": 9.851033847720164e-07,
"logits": -1.2282413244247437,
"logps": -83.51294708251953,
"loss": 0.1442,
"objective": 0.143393412232399,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4625000059604645,
"regularize": 0.143393412232399,
"step": 180,
"wo_beta": 14.920221328735352
},
{
"dpo_loss": 0.6972795128822327,
"epoch": 0.5243268776570619,
"grad_norm": 34.3447207787113,
"learning_rate": 9.83034164698452e-07,
"logits": -1.2574915885925293,
"logps": -82.5478515625,
"loss": 0.1382,
"objective": 0.14230893552303314,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4749999940395355,
"regularize": 0.14230893552303314,
"step": 185,
"wo_beta": 14.194059371948242
},
{
"dpo_loss": 0.6978750824928284,
"epoch": 0.538497874350496,
"grad_norm": 34.00712851830173,
"learning_rate": 9.808328879073251e-07,
"logits": -1.2612725496292114,
"logps": -81.91997528076172,
"loss": 0.1466,
"objective": 0.14948724210262299,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.14948724210262299,
"step": 190,
"wo_beta": 16.620363235473633
},
{
"dpo_loss": 0.6822370290756226,
"epoch": 0.5526688710439301,
"grad_norm": 31.586658287520144,
"learning_rate": 9.78500156205731e-07,
"logits": -1.2822577953338623,
"logps": -83.0813217163086,
"loss": 0.1319,
"objective": 0.13207347691059113,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5333333611488342,
"regularize": 0.13207347691059113,
"step": 195,
"wo_beta": 14.693647384643555
},
{
"dpo_loss": 0.7044106721878052,
"epoch": 0.5668398677373642,
"grad_norm": 30.369620708498754,
"learning_rate": 9.760366073392244e-07,
"logits": -1.3258157968521118,
"logps": -83.32820129394531,
"loss": 0.1427,
"objective": 0.15046708285808563,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5249999761581421,
"regularize": 0.15046708285808563,
"step": 200,
"wo_beta": 15.960111618041992
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6959174871444702,
"eval_logits": -1.3123745918273926,
"eval_logps": -88.34333801269531,
"eval_loss": 0.10319730639457703,
"eval_objective": 0.10169863700866699,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5222567319869995,
"eval_regularize": 0.10169863700866699,
"eval_runtime": 532.3008,
"eval_samples_per_second": 10.877,
"eval_steps_per_second": 0.907,
"eval_wo_beta": 15.992826461791992,
"step": 200
},
{
"dpo_loss": 0.7000283598899841,
"epoch": 0.5810108644307983,
"grad_norm": 30.076737378719095,
"learning_rate": 9.734429148174674e-07,
"logits": -1.2141386270523071,
"logps": -82.74073028564453,
"loss": 0.1484,
"objective": 0.1470470279455185,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.49166667461395264,
"regularize": 0.1470470279455185,
"step": 205,
"wo_beta": 16.118446350097656
},
{
"dpo_loss": 0.6862087249755859,
"epoch": 0.5951818611242324,
"grad_norm": 31.36222267459615,
"learning_rate": 9.707197877300973e-07,
"logits": -1.2483521699905396,
"logps": -82.3885269165039,
"loss": 0.1454,
"objective": 0.14993111789226532,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5291666388511658,
"regularize": 0.14993111789226532,
"step": 210,
"wo_beta": 15.07961654663086
},
{
"dpo_loss": 0.6946883797645569,
"epoch": 0.6093528578176665,
"grad_norm": 32.25125352651472,
"learning_rate": 9.678679705528698e-07,
"logits": -1.3168671131134033,
"logps": -82.3456039428711,
"loss": 0.1384,
"objective": 0.14188070595264435,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.4791666567325592,
"regularize": 0.14188070595264435,
"step": 215,
"wo_beta": 16.104469299316406
},
{
"dpo_loss": 0.7026723027229309,
"epoch": 0.6235238545111006,
"grad_norm": 30.142053540661294,
"learning_rate": 9.648882429441256e-07,
"logits": -1.3188337087631226,
"logps": -82.63532257080078,
"loss": 0.1477,
"objective": 0.1607874184846878,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.1607874184846878,
"step": 220,
"wo_beta": 17.079347610473633
},
{
"dpo_loss": 0.6998167634010315,
"epoch": 0.6376948512045347,
"grad_norm": 29.418648888160003,
"learning_rate": 9.61781419531641e-07,
"logits": -1.3314566612243652,
"logps": -82.72489929199219,
"loss": 0.1465,
"objective": 0.14282181859016418,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.4958333373069763,
"regularize": 0.14282181859016418,
"step": 225,
"wo_beta": 15.506386756896973
},
{
"dpo_loss": 0.7007436156272888,
"epoch": 0.6518658478979689,
"grad_norm": 31.584769522955447,
"learning_rate": 9.585483496899149e-07,
"logits": -1.2612279653549194,
"logps": -82.21707916259766,
"loss": 0.1434,
"objective": 0.14342841506004333,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5583333373069763,
"regularize": 0.14342841506004333,
"step": 230,
"wo_beta": 16.431724548339844
},
{
"dpo_loss": 0.7085835337638855,
"epoch": 0.6660368445914029,
"grad_norm": 33.138665174716316,
"learning_rate": 9.551899173079606e-07,
"logits": -1.2083913087844849,
"logps": -84.15171813964844,
"loss": 0.1479,
"objective": 0.14772751927375793,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5291666388511658,
"regularize": 0.14772751927375793,
"step": 235,
"wo_beta": 15.722906112670898
},
{
"dpo_loss": 0.6893501877784729,
"epoch": 0.680207841284837,
"grad_norm": 28.511782322472136,
"learning_rate": 9.517070405476574e-07,
"logits": -1.3556396961212158,
"logps": -83.491943359375,
"loss": 0.1408,
"objective": 0.1575685441493988,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 0.1575685441493988,
"step": 240,
"wo_beta": 15.692626953125
},
{
"dpo_loss": 0.6901037693023682,
"epoch": 0.6943788379782712,
"grad_norm": 28.887977273452503,
"learning_rate": 9.481006715927351e-07,
"logits": -1.3499360084533691,
"logps": -82.59223937988281,
"loss": 0.1422,
"objective": 0.1397981345653534,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5416666865348816,
"regularize": 0.1397981345653534,
"step": 245,
"wo_beta": 15.627848625183105
},
{
"dpo_loss": 0.6898453831672668,
"epoch": 0.7085498346717053,
"grad_norm": 30.778123472149638,
"learning_rate": 9.443717963884568e-07,
"logits": -1.1249743700027466,
"logps": -81.38602447509766,
"loss": 0.1451,
"objective": 0.12806275486946106,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.12806275486946106,
"step": 250,
"wo_beta": 14.860217094421387
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.6950441002845764,
"eval_logits": -1.2854480743408203,
"eval_logps": -88.06980895996094,
"eval_loss": 0.11781599372625351,
"eval_objective": 0.11854107677936554,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 0.11854107677936554,
"eval_runtime": 533.5732,
"eval_samples_per_second": 10.851,
"eval_steps_per_second": 0.905,
"eval_wo_beta": 15.787796020507812,
"step": 250
},
{
"dpo_loss": 0.6893075704574585,
"epoch": 0.7227208313651393,
"grad_norm": 27.48861543576658,
"learning_rate": 9.405214343720706e-07,
"logits": -1.3376212120056152,
"logps": -81.39327239990234,
"loss": 0.1325,
"objective": 0.12804514169692993,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.42916667461395264,
"ranking_simple": 0.44583332538604736,
"regularize": 0.12804514169692993,
"step": 255,
"wo_beta": 14.828557968139648
},
{
"dpo_loss": 0.6919839978218079,
"epoch": 0.7368918280585735,
"grad_norm": 27.470977695013012,
"learning_rate": 9.365506381941065e-07,
"logits": -1.3046835660934448,
"logps": -83.32947540283203,
"loss": 0.1509,
"objective": 0.15500593185424805,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.4583333432674408,
"regularize": 0.15500593185424805,
"step": 260,
"wo_beta": 15.419398307800293
},
{
"dpo_loss": 0.6987964510917664,
"epoch": 0.7510628247520076,
"grad_norm": 29.786537519342414,
"learning_rate": 9.32460493430591e-07,
"logits": -1.2736799716949463,
"logps": -82.46897888183594,
"loss": 0.1444,
"objective": 0.14515246450901031,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5708333253860474,
"regularize": 0.14515246450901031,
"step": 265,
"wo_beta": 15.908428192138672
},
{
"dpo_loss": 0.6944437026977539,
"epoch": 0.7652338214454416,
"grad_norm": 28.172549175339846,
"learning_rate": 9.282521182862629e-07,
"logits": -1.397876262664795,
"logps": -82.14982604980469,
"loss": 0.1491,
"objective": 0.15289539098739624,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.4541666805744171,
"regularize": 0.15289539098739624,
"step": 270,
"wo_beta": 14.118414878845215
},
{
"dpo_loss": 0.6878421902656555,
"epoch": 0.7794048181388757,
"grad_norm": 30.974249065309053,
"learning_rate": 9.239266632888658e-07,
"logits": -1.265884280204773,
"logps": -80.5745849609375,
"loss": 0.1429,
"objective": 0.13965575397014618,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 0.13965575397014618,
"step": 275,
"wo_beta": 15.147540092468262
},
{
"dpo_loss": 0.693124532699585,
"epoch": 0.7935758148323099,
"grad_norm": 27.26309671203667,
"learning_rate": 9.194853109746072e-07,
"logits": -1.317248821258545,
"logps": -80.71721649169922,
"loss": 0.1422,
"objective": 0.13741357624530792,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.13741357624530792,
"step": 280,
"wo_beta": 15.141572952270508
},
{
"dpo_loss": 0.6898981332778931,
"epoch": 0.807746811525744,
"grad_norm": 29.618387771117387,
"learning_rate": 9.14929275564863e-07,
"logits": -1.2990264892578125,
"logps": -81.34524536132812,
"loss": 0.1481,
"objective": 0.14202959835529327,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4749999940395355,
"regularize": 0.14202959835529327,
"step": 285,
"wo_beta": 16.715734481811523
},
{
"dpo_loss": 0.6989319920539856,
"epoch": 0.821917808219178,
"grad_norm": 30.35546225687188,
"learning_rate": 9.102598026342222e-07,
"logits": -1.310984492301941,
"logps": -80.47208404541016,
"loss": 0.1416,
"objective": 0.13658234477043152,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 0.13658234477043152,
"step": 290,
"wo_beta": 15.537436485290527
},
{
"dpo_loss": 0.692668080329895,
"epoch": 0.8360888049126122,
"grad_norm": 28.386489735858774,
"learning_rate": 9.0547816876996e-07,
"logits": -1.3056447505950928,
"logps": -80.58573913574219,
"loss": 0.1335,
"objective": 0.14200052618980408,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.48750001192092896,
"regularize": 0.14200052618980408,
"step": 295,
"wo_beta": 15.984179496765137
},
{
"dpo_loss": 0.6959550380706787,
"epoch": 0.8502598016060463,
"grad_norm": 30.90903589796416,
"learning_rate": 9.005856812230304e-07,
"logits": -1.2770187854766846,
"logps": -79.3738784790039,
"loss": 0.1305,
"objective": 0.12751255929470062,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5541666746139526,
"regularize": 0.12751255929470062,
"step": 300,
"wo_beta": 14.3499755859375
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6960889101028442,
"eval_logits": -1.2862635850906372,
"eval_logps": -86.33123016357422,
"eval_loss": 0.12468627840280533,
"eval_objective": 0.1251634955406189,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5279502868652344,
"eval_regularize": 0.1251634955406189,
"eval_runtime": 492.3852,
"eval_samples_per_second": 11.759,
"eval_steps_per_second": 0.981,
"eval_wo_beta": 15.766751289367676,
"step": 300
},
{
"dpo_loss": 0.6915071606636047,
"epoch": 0.8644307982994804,
"grad_norm": 28.35320542673635,
"learning_rate": 8.955836775506775e-07,
"logits": -1.2531558275222778,
"logps": -80.3687744140625,
"loss": 0.1326,
"objective": 0.1348031610250473,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 0.1348031610250473,
"step": 305,
"wo_beta": 15.622274398803711
},
{
"dpo_loss": 0.6971884965896606,
"epoch": 0.8786017949929145,
"grad_norm": 28.116582054859066,
"learning_rate": 8.904735252507609e-07,
"logits": -1.256584882736206,
"logps": -79.94914245605469,
"loss": 0.1365,
"objective": 0.1369226723909378,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4958333373069763,
"regularize": 0.1369226723909378,
"step": 310,
"wo_beta": 14.816594123840332
},
{
"dpo_loss": 0.6855903267860413,
"epoch": 0.8927727916863486,
"grad_norm": 29.897768012112312,
"learning_rate": 8.852566213878946e-07,
"logits": -1.2702066898345947,
"logps": -79.8655014038086,
"loss": 0.1353,
"objective": 0.13145793974399567,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4749999940395355,
"regularize": 0.13145793974399567,
"step": 315,
"wo_beta": 15.161810874938965
},
{
"dpo_loss": 0.691845178604126,
"epoch": 0.9069437883797827,
"grad_norm": 28.736143424115674,
"learning_rate": 8.799343922115043e-07,
"logits": -1.2241441011428833,
"logps": -82.17134094238281,
"loss": 0.13,
"objective": 0.1402612328529358,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5541666746139526,
"regularize": 0.1402612328529358,
"step": 320,
"wo_beta": 15.099017143249512
},
{
"dpo_loss": 0.6962689161300659,
"epoch": 0.9211147850732169,
"grad_norm": 28.356303375759392,
"learning_rate": 8.745082927659046e-07,
"logits": -1.2910945415496826,
"logps": -83.30491638183594,
"loss": 0.1308,
"objective": 0.14350637793540955,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.14350637793540955,
"step": 325,
"wo_beta": 15.133590698242188
},
{
"dpo_loss": 0.6975868344306946,
"epoch": 0.9352857817666509,
"grad_norm": 29.00689810312343,
"learning_rate": 8.689798064925048e-07,
"logits": -1.1349345445632935,
"logps": -82.04910278320312,
"loss": 0.1321,
"objective": 0.1296585500240326,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.1296585500240326,
"step": 330,
"wo_beta": 16.1423282623291
},
{
"dpo_loss": 0.7005541920661926,
"epoch": 0.949456778460085,
"grad_norm": 32.3756572284601,
"learning_rate": 8.633504448242504e-07,
"logits": -1.149806261062622,
"logps": -81.64175415039062,
"loss": 0.1375,
"objective": 0.1390267014503479,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.1390267014503479,
"step": 335,
"wo_beta": 15.652006149291992
},
{
"dpo_loss": 0.6950960755348206,
"epoch": 0.9636277751535192,
"grad_norm": 27.122604040368284,
"learning_rate": 8.576217467724127e-07,
"logits": -1.2132624387741089,
"logps": -80.64006042480469,
"loss": 0.1292,
"objective": 0.12200692296028137,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5416666865348816,
"regularize": 0.12200692296028137,
"step": 340,
"wo_beta": 15.907822608947754
},
{
"dpo_loss": 0.6975562572479248,
"epoch": 0.9777987718469532,
"grad_norm": 27.134170349804087,
"learning_rate": 8.517952785058384e-07,
"logits": -1.2632955312728882,
"logps": -80.71128845214844,
"loss": 0.1283,
"objective": 0.11938898265361786,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.5708333253860474,
"regularize": 0.11938898265361786,
"step": 345,
"wo_beta": 14.762292861938477
},
{
"dpo_loss": 0.6852299571037292,
"epoch": 0.9919697685403873,
"grad_norm": 27.658996359022336,
"learning_rate": 8.458726329227747e-07,
"logits": -1.1914026737213135,
"logps": -81.73149108886719,
"loss": 0.1407,
"objective": 0.1554519683122635,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.574999988079071,
"regularize": 0.1554519683122635,
"step": 350,
"wo_beta": 15.107101440429688
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.6975587606430054,
"eval_logits": -1.2756990194320679,
"eval_logps": -86.45014190673828,
"eval_loss": 0.13138790428638458,
"eval_objective": 0.13096390664577484,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5222567319869995,
"eval_regularize": 0.13096390664577484,
"eval_runtime": 498.5276,
"eval_samples_per_second": 11.614,
"eval_steps_per_second": 0.969,
"eval_wo_beta": 15.656978607177734,
"step": 350
},
{
"dpo_loss": 0.6982021331787109,
"epoch": 1.0061407652338215,
"grad_norm": 28.652193663332632,
"learning_rate": 8.398554292153865e-07,
"logits": -1.3350815773010254,
"logps": -79.34367370605469,
"loss": 0.1274,
"objective": 0.1257932186126709,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5333333611488342,
"regularize": 0.1257932186126709,
"step": 355,
"wo_beta": 16.378000259399414
},
{
"dpo_loss": 0.6944258809089661,
"epoch": 1.0203117619272555,
"grad_norm": 27.709591206743504,
"learning_rate": 8.337453124270862e-07,
"logits": -1.2474267482757568,
"logps": -80.31254577636719,
"loss": 0.1453,
"objective": 0.14443162083625793,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.4958333373069763,
"regularize": 0.14443162083625793,
"step": 360,
"wo_beta": 16.190935134887695
},
{
"dpo_loss": 0.6928178071975708,
"epoch": 1.0344827586206897,
"grad_norm": 31.070681767199403,
"learning_rate": 8.275439530027947e-07,
"logits": -1.276475191116333,
"logps": -80.50602722167969,
"loss": 0.1371,
"objective": 0.13979977369308472,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.13979977369308472,
"step": 365,
"wo_beta": 14.378859519958496
},
{
"dpo_loss": 0.699609637260437,
"epoch": 1.0486537553141237,
"grad_norm": 30.003574042191506,
"learning_rate": 8.212530463322582e-07,
"logits": -1.2496185302734375,
"logps": -79.11912536621094,
"loss": 0.1306,
"objective": 0.1423943042755127,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.512499988079071,
"regularize": 0.1423943042755127,
"step": 370,
"wo_beta": 15.124627113342285
},
{
"dpo_loss": 0.6967942714691162,
"epoch": 1.0628247520075578,
"grad_norm": 27.953352635424668,
"learning_rate": 8.148743122865463e-07,
"logits": -1.3011940717697144,
"logps": -80.02760314941406,
"loss": 0.1297,
"objective": 0.11541719734668732,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5249999761581421,
"regularize": 0.11541719734668732,
"step": 375,
"wo_beta": 15.568713188171387
},
{
"dpo_loss": 0.6877638697624207,
"epoch": 1.076995748700992,
"grad_norm": 27.32675287386393,
"learning_rate": 8.084094947478554e-07,
"logits": -1.2718795537948608,
"logps": -81.57784271240234,
"loss": 0.1306,
"objective": 0.1403437703847885,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.574999988079071,
"regularize": 0.1403437703847885,
"step": 380,
"wo_beta": 15.024064064025879
},
{
"dpo_loss": 0.7029018402099609,
"epoch": 1.091166745394426,
"grad_norm": 25.199092121516863,
"learning_rate": 8.018603611327504e-07,
"logits": -1.2051031589508057,
"logps": -80.49242401123047,
"loss": 0.1289,
"objective": 0.12692388892173767,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5375000238418579,
"regularize": 0.12692388892173767,
"step": 385,
"wo_beta": 15.658522605895996
},
{
"dpo_loss": 0.6920034885406494,
"epoch": 1.10533774208786,
"grad_norm": 28.52425339340298,
"learning_rate": 7.952287019089685e-07,
"logits": -1.1542584896087646,
"logps": -82.2014389038086,
"loss": 0.128,
"objective": 0.13050222396850586,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.13050222396850586,
"step": 390,
"wo_beta": 15.990551948547363
},
{
"dpo_loss": 0.6983939409255981,
"epoch": 1.1195087387812943,
"grad_norm": 28.676328293583875,
"learning_rate": 7.88516330105925e-07,
"logits": -1.21431303024292,
"logps": -81.3152847290039,
"loss": 0.1271,
"objective": 0.12024066597223282,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 0.12024066597223282,
"step": 395,
"wo_beta": 14.856566429138184
},
{
"dpo_loss": 0.7045825719833374,
"epoch": 1.1336797354747283,
"grad_norm": 26.361954924055155,
"learning_rate": 7.817250808190483e-07,
"logits": -1.2783249616622925,
"logps": -79.67323303222656,
"loss": 0.1245,
"objective": 0.12074790149927139,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 0.12074790149927139,
"step": 400,
"wo_beta": 15.344539642333984
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.698018491268158,
"eval_logits": -1.2417831420898438,
"eval_logps": -86.2849349975586,
"eval_loss": 0.13988268375396729,
"eval_objective": 0.13904725015163422,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 0.13904725015163422,
"eval_runtime": 544.4211,
"eval_samples_per_second": 10.635,
"eval_steps_per_second": 0.887,
"eval_wo_beta": 15.614696502685547,
"step": 400
},
{
"dpo_loss": 0.6944829225540161,
"epoch": 1.1478507321681626,
"grad_norm": 26.829396266860115,
"learning_rate": 7.74856810708083e-07,
"logits": -1.2358256578445435,
"logps": -80.91136169433594,
"loss": 0.1252,
"objective": 0.13733495771884918,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.46666666865348816,
"regularize": 0.13733495771884918,
"step": 405,
"wo_beta": 16.799846649169922
},
{
"dpo_loss": 0.6951694488525391,
"epoch": 1.1620217288615966,
"grad_norm": 25.84880624163644,
"learning_rate": 7.679133974894982e-07,
"logits": -1.2413955926895142,
"logps": -80.84453582763672,
"loss": 0.1146,
"objective": 0.10967493802309036,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.512499988079071,
"regularize": 0.10967493802309036,
"step": 410,
"wo_beta": 16.299657821655273
},
{
"dpo_loss": 0.6816955804824829,
"epoch": 1.1761927255550306,
"grad_norm": 28.539266676030703,
"learning_rate": 7.608967394231386e-07,
"logits": -1.1460075378417969,
"logps": -80.07962799072266,
"loss": 0.1201,
"objective": 0.11568634957075119,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.512499988079071,
"regularize": 0.11568634957075119,
"step": 415,
"wo_beta": 15.849366188049316
},
{
"dpo_loss": 0.6958954334259033,
"epoch": 1.1903637222484649,
"grad_norm": 26.83226072322417,
"learning_rate": 7.538087547932584e-07,
"logits": -1.1252403259277344,
"logps": -80.94552612304688,
"loss": 0.1212,
"objective": 0.11827482283115387,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5083333253860474,
"regularize": 0.11827482283115387,
"step": 420,
"wo_beta": 16.14940643310547
},
{
"dpo_loss": 0.6887015700340271,
"epoch": 1.204534718941899,
"grad_norm": 26.51780573149761,
"learning_rate": 7.466513813840824e-07,
"logits": -1.1933962106704712,
"logps": -78.89797973632812,
"loss": 0.1135,
"objective": 0.1143736019730568,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5166666507720947,
"regularize": 0.1143736019730568,
"step": 425,
"wo_beta": 14.578470230102539
},
{
"dpo_loss": 0.6991615891456604,
"epoch": 1.2187057156353331,
"grad_norm": 27.122286588814305,
"learning_rate": 7.394265759500347e-07,
"logits": -1.1930339336395264,
"logps": -80.126220703125,
"loss": 0.1127,
"objective": 0.11676573753356934,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4749999940395355,
"regularize": 0.11676573753356934,
"step": 430,
"wo_beta": 15.9819974899292
},
{
"dpo_loss": 0.6940677762031555,
"epoch": 1.2328767123287672,
"grad_norm": 26.542064973728884,
"learning_rate": 7.321363136807818e-07,
"logits": -1.1478148698806763,
"logps": -80.018310546875,
"loss": 0.1273,
"objective": 0.12024448066949844,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.6000000238418579,
"regularize": 0.12024448066949844,
"step": 435,
"wo_beta": 17.044919967651367
},
{
"dpo_loss": 0.6969379186630249,
"epoch": 1.2470477090222012,
"grad_norm": 28.251093862423456,
"learning_rate": 7.247825876612352e-07,
"logits": -1.1687721014022827,
"logps": -79.19255828857422,
"loss": 0.1253,
"objective": 0.12027813494205475,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.550000011920929,
"regularize": 0.12027813494205475,
"step": 440,
"wo_beta": 14.654241561889648
},
{
"dpo_loss": 0.6925280094146729,
"epoch": 1.2612187057156352,
"grad_norm": 26.524957115429544,
"learning_rate": 7.173674083266623e-07,
"logits": -1.1623238325119019,
"logps": -80.57234191894531,
"loss": 0.1123,
"objective": 0.11110406368970871,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5416666865348816,
"regularize": 0.11110406368970871,
"step": 445,
"wo_beta": 16.533472061157227
},
{
"dpo_loss": 0.6959200501441956,
"epoch": 1.2753897024090695,
"grad_norm": 25.817109114436615,
"learning_rate": 7.098928029130528e-07,
"logits": -1.2953335046768188,
"logps": -80.17058563232422,
"loss": 0.1163,
"objective": 0.11630918085575104,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.11630918085575104,
"step": 450,
"wo_beta": 14.598237991333008
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.6984797716140747,
"eval_logits": -1.2306897640228271,
"eval_logps": -85.48281860351562,
"eval_loss": 0.14205217361450195,
"eval_objective": 0.14207439124584198,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5274327397346497,
"eval_regularize": 0.14207439124584198,
"eval_runtime": 502.3414,
"eval_samples_per_second": 11.526,
"eval_steps_per_second": 0.961,
"eval_wo_beta": 15.61281681060791,
"step": 450
},
{
"dpo_loss": 0.6977149248123169,
"epoch": 1.2895606991025035,
"grad_norm": 29.01794849451687,
"learning_rate": 7.023608149028936e-07,
"logits": -1.1321525573730469,
"logps": -79.79704284667969,
"loss": 0.1102,
"objective": 0.10798730701208115,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4958333373069763,
"regularize": 0.10798730701208115,
"step": 455,
"wo_beta": 14.988642692565918
},
{
"dpo_loss": 0.6960881352424622,
"epoch": 1.3037316957959377,
"grad_norm": 25.82316278857825,
"learning_rate": 6.947735034665001e-07,
"logits": -1.2272473573684692,
"logps": -79.4093246459961,
"loss": 0.1071,
"objective": 0.10132616013288498,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.48750001192092896,
"regularize": 0.10132616013288498,
"step": 460,
"wo_beta": 15.888258934020996
},
{
"dpo_loss": 0.6906372308731079,
"epoch": 1.3179026924893718,
"grad_norm": 30.635018246102483,
"learning_rate": 6.871329428990601e-07,
"logits": -1.2102056741714478,
"logps": -78.2228775024414,
"loss": 0.1131,
"objective": 0.11604170501232147,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.46666666865348816,
"regularize": 0.11604170501232147,
"step": 465,
"wo_beta": 14.311129570007324
},
{
"dpo_loss": 0.700882077217102,
"epoch": 1.3320736891828058,
"grad_norm": 27.46778566417897,
"learning_rate": 6.794412220535425e-07,
"logits": -1.2833130359649658,
"logps": -77.55262756347656,
"loss": 0.108,
"objective": 0.10955775529146194,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 0.10955775529146194,
"step": 470,
"wo_beta": 14.30273151397705
},
{
"dpo_loss": 0.6947088241577148,
"epoch": 1.34624468587624,
"grad_norm": 27.567991845029866,
"learning_rate": 6.717004437696249e-07,
"logits": -1.1878196001052856,
"logps": -79.7737808227539,
"loss": 0.1143,
"objective": 0.10682200640439987,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 0.10682200640439987,
"step": 475,
"wo_beta": 16.000301361083984
},
{
"dpo_loss": 0.7012575268745422,
"epoch": 1.360415682569674,
"grad_norm": 26.96292751307233,
"learning_rate": 6.639127242987987e-07,
"logits": -1.2194726467132568,
"logps": -79.7364730834961,
"loss": 0.1121,
"objective": 0.10879840701818466,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4958333373069763,
"regularize": 0.10879840701818466,
"step": 480,
"wo_beta": 17.723169326782227
},
{
"dpo_loss": 0.6902076005935669,
"epoch": 1.3745866792631083,
"grad_norm": 25.536217139623062,
"learning_rate": 6.560801927258079e-07,
"logits": -1.2140812873840332,
"logps": -77.77493286132812,
"loss": 0.1063,
"objective": 0.10283537954092026,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.4958333373069763,
"regularize": 0.10283537954092026,
"step": 485,
"wo_beta": 16.162378311157227
},
{
"dpo_loss": 0.7003743648529053,
"epoch": 1.3887576759565423,
"grad_norm": 25.340128312194377,
"learning_rate": 6.482049903865768e-07,
"logits": -1.1755324602127075,
"logps": -80.6698226928711,
"loss": 0.1065,
"objective": 0.11661101877689362,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5375000238418579,
"regularize": 0.11661101877689362,
"step": 490,
"wo_beta": 15.291964530944824
},
{
"dpo_loss": 0.6933376789093018,
"epoch": 1.4029286726499763,
"grad_norm": 27.42214588210337,
"learning_rate": 6.402892702827916e-07,
"logits": -1.203405499458313,
"logps": -81.71482849121094,
"loss": 0.1083,
"objective": 0.11117922514677048,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4833333194255829,
"regularize": 0.11117922514677048,
"step": 495,
"wo_beta": 15.19780445098877
},
{
"dpo_loss": 0.6919417977333069,
"epoch": 1.4170996693434104,
"grad_norm": 25.59099749967404,
"learning_rate": 6.323351964932908e-07,
"logits": -1.1464035511016846,
"logps": -80.67649841308594,
"loss": 0.1071,
"objective": 0.10751333087682724,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.49166667461395264,
"regularize": 0.10751333087682724,
"step": 500,
"wo_beta": 14.786382675170898
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6979657411575317,
"eval_logits": -1.2270138263702393,
"eval_logps": -87.26725006103516,
"eval_loss": 0.13817694783210754,
"eval_objective": 0.1376110315322876,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5284678936004639,
"eval_regularize": 0.1376110315322876,
"eval_runtime": 507.9058,
"eval_samples_per_second": 11.4,
"eval_steps_per_second": 0.951,
"eval_wo_beta": 15.64445686340332,
"step": 500
},
{
"dpo_loss": 0.6891559362411499,
"epoch": 1.4312706660368446,
"grad_norm": 24.27122577359571,
"learning_rate": 6.243449435824276e-07,
"logits": -1.2177590131759644,
"logps": -81.35147094726562,
"loss": 0.1101,
"objective": 0.1094871535897255,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.1094871535897255,
"step": 505,
"wo_beta": 15.79046630859375
},
{
"dpo_loss": 0.6941244602203369,
"epoch": 1.4454416627302786,
"grad_norm": 25.930769694740054,
"learning_rate": 6.163206960055652e-07,
"logits": -1.251134991645813,
"logps": -83.10639953613281,
"loss": 0.1006,
"objective": 0.09994279593229294,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5291666388511658,
"regularize": 0.09994279593229294,
"step": 510,
"wo_beta": 14.899516105651855
},
{
"dpo_loss": 0.6874905824661255,
"epoch": 1.4596126594237129,
"grad_norm": 26.602314880639124,
"learning_rate": 6.082646475118699e-07,
"logits": -1.2633229494094849,
"logps": -84.02688598632812,
"loss": 0.106,
"objective": 0.10199037194252014,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.4541666805744171,
"regularize": 0.10199037194252014,
"step": 515,
"wo_beta": 15.941681861877441
},
{
"dpo_loss": 0.6967552900314331,
"epoch": 1.473783656117147,
"grad_norm": 29.852612268822412,
"learning_rate": 6.001790005445606e-07,
"logits": -1.184912919998169,
"logps": -80.95891571044922,
"loss": 0.1071,
"objective": 0.10300089418888092,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.49166667461395264,
"regularize": 0.10300089418888092,
"step": 520,
"wo_beta": 15.731270790100098
},
{
"dpo_loss": 0.6896428465843201,
"epoch": 1.487954652810581,
"grad_norm": 25.111572790175902,
"learning_rate": 5.920659656387836e-07,
"logits": -1.0910202264785767,
"logps": -79.46784973144531,
"loss": 0.1087,
"objective": 0.10289794951677322,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.10289794951677322,
"step": 525,
"wo_beta": 14.915215492248535
},
{
"dpo_loss": 0.6884135603904724,
"epoch": 1.5021256495040152,
"grad_norm": 29.155836377588727,
"learning_rate": 5.839277608172738e-07,
"logits": -1.2429722547531128,
"logps": -82.09452056884766,
"loss": 0.1052,
"objective": 0.11081438511610031,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 0.11081438511610031,
"step": 530,
"wo_beta": 14.850537300109863
},
{
"dpo_loss": 0.6961663961410522,
"epoch": 1.5162966461974492,
"grad_norm": 25.320925581209725,
"learning_rate": 5.757666109839702e-07,
"logits": -1.2323859930038452,
"logps": -80.30747985839844,
"loss": 0.1001,
"objective": 0.09293892234563828,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 0.09293892234563828,
"step": 535,
"wo_beta": 15.262944221496582
},
{
"dpo_loss": 0.6923481225967407,
"epoch": 1.5304676428908834,
"grad_norm": 32.01848958383342,
"learning_rate": 5.675847473157485e-07,
"logits": -1.1209362745285034,
"logps": -80.81604766845703,
"loss": 0.1017,
"objective": 0.1114068478345871,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.1114068478345871,
"step": 540,
"wo_beta": 14.910977363586426
},
{
"dpo_loss": 0.6916370987892151,
"epoch": 1.5446386395843175,
"grad_norm": 25.57319909143034,
"learning_rate": 5.5938440665244e-07,
"logits": -1.2216829061508179,
"logps": -81.30005645751953,
"loss": 0.1016,
"objective": 0.09744974970817566,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.5791666507720947,
"regularize": 0.09744974970817566,
"step": 545,
"wo_beta": 14.310770988464355
},
{
"dpo_loss": 0.6908753514289856,
"epoch": 1.5588096362777515,
"grad_norm": 25.722462769354692,
"learning_rate": 5.511678308853025e-07,
"logits": -1.2278273105621338,
"logps": -81.18257141113281,
"loss": 0.1045,
"objective": 0.11294317990541458,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.44583332538604736,
"regularize": 0.11294317990541458,
"step": 550,
"wo_beta": 14.909473419189453
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.6977279186248779,
"eval_logits": -1.2327359914779663,
"eval_logps": -87.07755279541016,
"eval_loss": 0.1427639275789261,
"eval_objective": 0.14261718094348907,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5253623127937317,
"eval_regularize": 0.14261718094348907,
"eval_runtime": 530.6929,
"eval_samples_per_second": 10.91,
"eval_steps_per_second": 0.91,
"eval_wo_beta": 15.58066177368164,
"step": 550
},
{
"dpo_loss": 0.6979414820671082,
"epoch": 1.5729806329711855,
"grad_norm": 25.97117042381748,
"learning_rate": 5.429372663441085e-07,
"logits": -1.0773119926452637,
"logps": -80.85298919677734,
"loss": 0.0969,
"objective": 0.10372842103242874,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5083333253860474,
"regularize": 0.10372842103242874,
"step": 555,
"wo_beta": 14.278889656066895
},
{
"dpo_loss": 0.6877902746200562,
"epoch": 1.5871516296646198,
"grad_norm": 26.089498554586406,
"learning_rate": 5.34694963183022e-07,
"logits": -1.149969220161438,
"logps": -80.23606872558594,
"loss": 0.0937,
"objective": 0.0943736732006073,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5249999761581421,
"regularize": 0.0943736732006073,
"step": 560,
"wo_beta": 15.772320747375488
},
{
"dpo_loss": 0.6947767734527588,
"epoch": 1.601322626358054,
"grad_norm": 26.116747650931945,
"learning_rate": 5.264431747654283e-07,
"logits": -1.1340062618255615,
"logps": -81.63863372802734,
"loss": 0.0947,
"objective": 0.10096503049135208,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.4416666626930237,
"regularize": 0.10096503049135208,
"step": 565,
"wo_beta": 14.981669425964355
},
{
"dpo_loss": 0.692035436630249,
"epoch": 1.615493623051488,
"grad_norm": 30.63214010200871,
"learning_rate": 5.181841570478872e-07,
"logits": -1.2694156169891357,
"logps": -81.64689636230469,
"loss": 0.0959,
"objective": 0.1027316302061081,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5375000238418579,
"regularize": 0.1027316302061081,
"step": 570,
"wo_beta": 15.00640869140625
},
{
"dpo_loss": 0.6928724646568298,
"epoch": 1.629664619744922,
"grad_norm": 26.684109688489027,
"learning_rate": 5.099201679633768e-07,
"logits": -1.219287633895874,
"logps": -79.6671371459961,
"loss": 0.0902,
"objective": 0.08943381905555725,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5458333492279053,
"regularize": 0.08943381905555725,
"step": 575,
"wo_beta": 15.883743286132812
},
{
"dpo_loss": 0.7014293670654297,
"epoch": 1.643835616438356,
"grad_norm": 23.659415037737205,
"learning_rate": 5.016534668039976e-07,
"logits": -1.245025396347046,
"logps": -79.65864562988281,
"loss": 0.0922,
"objective": 0.09364978969097137,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5375000238418579,
"regularize": 0.09364978969097137,
"step": 580,
"wo_beta": 14.111478805541992
},
{
"dpo_loss": 0.6919021606445312,
"epoch": 1.6580066131317903,
"grad_norm": 26.81667336982406,
"learning_rate": 4.933863136033039e-07,
"logits": -1.1303011178970337,
"logps": -79.01573944091797,
"loss": 0.0912,
"objective": 0.09164983779191971,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.4749999940395355,
"regularize": 0.09164983779191971,
"step": 585,
"wo_beta": 15.943554878234863
},
{
"dpo_loss": 0.6958838701248169,
"epoch": 1.6721776098252243,
"grad_norm": 24.98087975104312,
"learning_rate": 4.851209685184338e-07,
"logits": -1.1811211109161377,
"logps": -78.23771667480469,
"loss": 0.0896,
"objective": 0.08815690129995346,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.49166667461395264,
"regularize": 0.08815690129995346,
"step": 590,
"wo_beta": 13.053691864013672
},
{
"dpo_loss": 0.7018415927886963,
"epoch": 1.6863486065186586,
"grad_norm": 28.222712089048155,
"learning_rate": 4.768596912122045e-07,
"logits": -1.1410056352615356,
"logps": -78.93828582763672,
"loss": 0.0867,
"objective": 0.08855770528316498,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5208333134651184,
"regularize": 0.08855770528316498,
"step": 595,
"wo_beta": 16.56429672241211
},
{
"dpo_loss": 0.6934791803359985,
"epoch": 1.7005196032120926,
"grad_norm": 26.368906194308657,
"learning_rate": 4.686047402353433e-07,
"logits": -1.1907525062561035,
"logps": -80.13634490966797,
"loss": 0.0866,
"objective": 0.09509587287902832,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5375000238418579,
"regularize": 0.09509587287902832,
"step": 600,
"wo_beta": 15.167766571044922
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6965176463127136,
"eval_logits": -1.2196165323257446,
"eval_logps": -85.19258880615234,
"eval_loss": 0.14236733317375183,
"eval_objective": 0.14079627394676208,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 0.14079627394676208,
"eval_runtime": 531.3996,
"eval_samples_per_second": 10.896,
"eval_steps_per_second": 0.909,
"eval_wo_beta": 15.660321235656738,
"step": 600
},
{
"dpo_loss": 0.6949159502983093,
"epoch": 1.7146905999055266,
"grad_norm": 27.65546942935795,
"learning_rate": 4.60358372409022e-07,
"logits": -1.135356068611145,
"logps": -80.58204650878906,
"loss": 0.0851,
"objective": 0.10239014774560928,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5166666507720947,
"regularize": 0.10239014774560928,
"step": 605,
"wo_beta": 17.19474220275879
},
{
"dpo_loss": 0.6975926160812378,
"epoch": 1.7288615965989607,
"grad_norm": 27.123580050770954,
"learning_rate": 4.521228422078649e-07,
"logits": -1.2206453084945679,
"logps": -78.68167877197266,
"loss": 0.0882,
"objective": 0.0891619473695755,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.5791666507720947,
"regularize": 0.0891619473695755,
"step": 610,
"wo_beta": 15.383539199829102
},
{
"dpo_loss": 0.6973095536231995,
"epoch": 1.743032593292395,
"grad_norm": 27.24577954816879,
"learning_rate": 4.439004011435979e-07,
"logits": -1.2362395524978638,
"logps": -79.0839614868164,
"loss": 0.0875,
"objective": 0.08598390221595764,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5166666507720947,
"regularize": 0.08598390221595764,
"step": 615,
"wo_beta": 15.571494102478027
},
{
"dpo_loss": 0.6836999654769897,
"epoch": 1.7572035899858292,
"grad_norm": 26.787162425144906,
"learning_rate": 4.3569329714950703e-07,
"logits": -1.2427488565444946,
"logps": -79.54029846191406,
"loss": 0.0838,
"objective": 0.08879180997610092,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.08879180997610092,
"step": 620,
"wo_beta": 15.718174934387207
},
{
"dpo_loss": 0.6966572999954224,
"epoch": 1.7713745866792632,
"grad_norm": 27.034118419678652,
"learning_rate": 4.275037739658771e-07,
"logits": -1.1582579612731934,
"logps": -78.85964965820312,
"loss": 0.0817,
"objective": 0.08299548178911209,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.08299548178911209,
"step": 625,
"wo_beta": 14.923952102661133
},
{
"dpo_loss": 0.688913881778717,
"epoch": 1.7855455833726972,
"grad_norm": 25.951621085094303,
"learning_rate": 4.193340705265745e-07,
"logits": -1.1893038749694824,
"logps": -80.92503356933594,
"loss": 0.0785,
"objective": 0.08198042213916779,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.48750001192092896,
"regularize": 0.08198042213916779,
"step": 630,
"wo_beta": 15.90516185760498
},
{
"dpo_loss": 0.6924195885658264,
"epoch": 1.7997165800661312,
"grad_norm": 26.232506833263244,
"learning_rate": 4.1118642034694565e-07,
"logits": -1.2785860300064087,
"logps": -79.61809539794922,
"loss": 0.0829,
"objective": 0.08000766485929489,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5416666865348816,
"regularize": 0.08000766485929489,
"step": 635,
"wo_beta": 15.796289443969727
},
{
"dpo_loss": 0.6959947943687439,
"epoch": 1.8138875767595655,
"grad_norm": 27.993983855367574,
"learning_rate": 4.030630509131959e-07,
"logits": -1.2194859981536865,
"logps": -80.71635437011719,
"loss": 0.0842,
"objective": 0.089814692735672,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5,
"regularize": 0.089814692735672,
"step": 640,
"wo_beta": 16.932401657104492
},
{
"dpo_loss": 0.6927257776260376,
"epoch": 1.8280585734529995,
"grad_norm": 28.107934645205802,
"learning_rate": 3.9496618307341713e-07,
"logits": -1.256467342376709,
"logps": -81.03665161132812,
"loss": 0.0853,
"objective": 0.0889531597495079,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5541666746139526,
"regularize": 0.0889531597495079,
"step": 645,
"wo_beta": 15.800675392150879
},
{
"dpo_loss": 0.6969668865203857,
"epoch": 1.8422295701464337,
"grad_norm": 27.015977070193543,
"learning_rate": 3.8689803043042996e-07,
"logits": -1.2903110980987549,
"logps": -80.92781829833984,
"loss": 0.0847,
"objective": 0.0801667794585228,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 0.0801667794585228,
"step": 650,
"wo_beta": 15.173321723937988
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.6974130868911743,
"eval_logits": -1.2229208946228027,
"eval_logps": -86.1129150390625,
"eval_loss": 0.1379525512456894,
"eval_objective": 0.13563887774944305,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5243270993232727,
"eval_regularize": 0.13563887774944305,
"eval_runtime": 538.8083,
"eval_samples_per_second": 10.746,
"eval_steps_per_second": 0.896,
"eval_wo_beta": 15.666037559509277,
"step": 650
},
{
"dpo_loss": 0.6896820068359375,
"epoch": 1.8564005668398678,
"grad_norm": 26.639855046988597,
"learning_rate": 3.788607987366069e-07,
"logits": -1.1662521362304688,
"logps": -78.9451675415039,
"loss": 0.081,
"objective": 0.08504978567361832,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.4958333373069763,
"regularize": 0.08504978567361832,
"step": 655,
"wo_beta": 15.233590126037598
},
{
"dpo_loss": 0.6891672611236572,
"epoch": 1.8705715635333018,
"grad_norm": 25.995274477757608,
"learning_rate": 3.708566852908418e-07,
"logits": -1.2193191051483154,
"logps": -81.20162200927734,
"loss": 0.0781,
"objective": 0.08211526274681091,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5291666388511658,
"regularize": 0.08211526274681091,
"step": 660,
"wo_beta": 16.372514724731445
},
{
"dpo_loss": 0.6908305287361145,
"epoch": 1.8847425602267358,
"grad_norm": 28.66146531985666,
"learning_rate": 3.6288787833783016e-07,
"logits": -1.2218626737594604,
"logps": -80.04493713378906,
"loss": 0.0815,
"objective": 0.08463230729103088,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5416666865348816,
"regularize": 0.08463230729103088,
"step": 665,
"wo_beta": 15.034836769104004
},
{
"dpo_loss": 0.6863933801651001,
"epoch": 1.89891355692017,
"grad_norm": 30.111613598581105,
"learning_rate": 3.5495655646982503e-07,
"logits": -1.1576950550079346,
"logps": -78.58309173583984,
"loss": 0.0755,
"objective": 0.07363765686750412,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.47083333134651184,
"regularize": 0.07363765686750412,
"step": 670,
"wo_beta": 16.09279441833496
},
{
"dpo_loss": 0.695208728313446,
"epoch": 1.9130845536136043,
"grad_norm": 27.241452477717303,
"learning_rate": 3.470648880310313e-07,
"logits": -1.1648114919662476,
"logps": -79.5347671508789,
"loss": 0.0735,
"objective": 0.07240771502256393,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5249999761581421,
"regularize": 0.07240771502256393,
"step": 675,
"wo_beta": 15.440892219543457
},
{
"dpo_loss": 0.6950518488883972,
"epoch": 1.9272555503070383,
"grad_norm": 25.198332305215366,
"learning_rate": 3.3921503052480236e-07,
"logits": -1.2177760601043701,
"logps": -81.27088165283203,
"loss": 0.0778,
"objective": 0.07866664230823517,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 0.07866664230823517,
"step": 680,
"wo_beta": 14.848203659057617
},
{
"dpo_loss": 0.6911803483963013,
"epoch": 1.9414265470004723,
"grad_norm": 26.10726119743999,
"learning_rate": 3.314091300237999e-07,
"logits": -1.1625895500183105,
"logps": -78.15774536132812,
"loss": 0.0738,
"objective": 0.07330299913883209,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4749999940395355,
"regularize": 0.07330299913883209,
"step": 685,
"wo_beta": 15.467205047607422
},
{
"dpo_loss": 0.6860196590423584,
"epoch": 1.9555975436939064,
"grad_norm": 26.093926175967837,
"learning_rate": 3.236493205832794e-07,
"logits": -1.21792733669281,
"logps": -79.12659454345703,
"loss": 0.071,
"objective": 0.07433832436800003,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5458333492279053,
"regularize": 0.07433832436800003,
"step": 690,
"wo_beta": 15.627902030944824
},
{
"dpo_loss": 0.6922653317451477,
"epoch": 1.9697685403873406,
"grad_norm": 26.38661425001647,
"learning_rate": 3.15937723657661e-07,
"logits": -1.1168206930160522,
"logps": -79.83128356933594,
"loss": 0.0723,
"objective": 0.06720028072595596,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.49166667461395264,
"regularize": 0.06720028072595596,
"step": 695,
"wo_beta": 16.023746490478516
},
{
"dpo_loss": 0.6856616139411926,
"epoch": 1.9839395370807746,
"grad_norm": 27.380948849082866,
"learning_rate": 3.082764475205442e-07,
"logits": -1.103851079940796,
"logps": -80.37809753417969,
"loss": 0.071,
"objective": 0.0717112347483635,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5166666507720947,
"regularize": 0.0717112347483635,
"step": 700,
"wo_beta": 14.657614707946777
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6979688405990601,
"eval_logits": -1.220837116241455,
"eval_logps": -85.24955749511719,
"eval_loss": 0.1420368105173111,
"eval_objective": 0.14046597480773926,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5253623127937317,
"eval_regularize": 0.14046597480773926,
"eval_runtime": 531.1966,
"eval_samples_per_second": 10.9,
"eval_steps_per_second": 0.909,
"eval_wo_beta": 15.610904693603516,
"step": 700
},
{
"dpo_loss": 0.6904897093772888,
"epoch": 1.9981105337742089,
"grad_norm": 27.03253447324609,
"learning_rate": 3.006675866883275e-07,
"logits": -1.0365864038467407,
"logps": -79.36177062988281,
"loss": 0.0704,
"objective": 0.07408583164215088,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.07408583164215088,
"step": 705,
"wo_beta": 16.758014678955078
},
{
"dpo_loss": 0.6849521398544312,
"epoch": 2.012281530467643,
"grad_norm": 24.956147004394822,
"learning_rate": 2.931132213475884e-07,
"logits": -1.1888701915740967,
"logps": -78.96455383300781,
"loss": 0.0619,
"objective": 0.06422288715839386,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.06422288715839386,
"step": 710,
"wo_beta": 15.760772705078125
},
{
"dpo_loss": 0.6961538791656494,
"epoch": 2.026452527161077,
"grad_norm": 27.279846270487834,
"learning_rate": 2.856154167863814e-07,
"logits": -1.1860238313674927,
"logps": -78.40641021728516,
"loss": 0.0631,
"objective": 0.06441039592027664,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.550000011920929,
"regularize": 0.06441039592027664,
"step": 715,
"wo_beta": 14.784539222717285
},
{
"dpo_loss": 0.6898289322853088,
"epoch": 2.040623523854511,
"grad_norm": 28.14233189102926,
"learning_rate": 2.7817622282960813e-07,
"logits": -1.1884685754776,
"logps": -79.12120819091797,
"loss": 0.0633,
"objective": 0.06231885775923729,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.06231885775923729,
"step": 720,
"wo_beta": 15.765007972717285
},
{
"dpo_loss": 0.6928841471672058,
"epoch": 2.0547945205479454,
"grad_norm": 25.78908501428665,
"learning_rate": 2.707976732786166e-07,
"logits": -1.1958059072494507,
"logps": -81.6028060913086,
"loss": 0.0578,
"objective": 0.06330116838216782,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.06330116838216782,
"step": 725,
"wo_beta": 13.992663383483887
},
{
"dpo_loss": 0.6905084252357483,
"epoch": 2.0689655172413794,
"grad_norm": 26.140030636203093,
"learning_rate": 2.6348178535517965e-07,
"logits": -1.2607707977294922,
"logps": -79.21609497070312,
"loss": 0.0598,
"objective": 0.05353347584605217,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.4958333373069763,
"regularize": 0.05353347584605217,
"step": 730,
"wo_beta": 15.095206260681152
},
{
"dpo_loss": 0.6901918053627014,
"epoch": 2.0831365139348135,
"grad_norm": 27.302640565922513,
"learning_rate": 2.5623055915000686e-07,
"logits": -1.1885894536972046,
"logps": -78.86723327636719,
"loss": 0.0579,
"objective": 0.05939151346683502,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4833333194255829,
"regularize": 0.05939151346683502,
"step": 735,
"wo_beta": 16.905290603637695
},
{
"dpo_loss": 0.6903797388076782,
"epoch": 2.0973075106282475,
"grad_norm": 25.718973789328345,
"learning_rate": 2.490459770759398e-07,
"logits": -1.2478386163711548,
"logps": -79.14292907714844,
"loss": 0.0573,
"objective": 0.05540405213832855,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.05540405213832855,
"step": 740,
"wo_beta": 15.3594331741333
},
{
"dpo_loss": 0.6952056288719177,
"epoch": 2.1114785073216815,
"grad_norm": 26.12282917762503,
"learning_rate": 2.419300033259798e-07,
"logits": -1.1640416383743286,
"logps": -79.09960174560547,
"loss": 0.0628,
"objective": 0.0631415918469429,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5541666746139526,
"regularize": 0.0631415918469429,
"step": 745,
"wo_beta": 14.359167098999023
},
{
"dpo_loss": 0.6888077259063721,
"epoch": 2.1256495040151155,
"grad_norm": 25.53259897003242,
"learning_rate": 2.3488458333629773e-07,
"logits": -1.2182810306549072,
"logps": -78.26011657714844,
"loss": 0.0546,
"objective": 0.05781084671616554,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.05781084671616554,
"step": 750,
"wo_beta": 15.271900177001953
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 0.6980140209197998,
"eval_logits": -1.2232871055603027,
"eval_logps": -85.46907806396484,
"eval_loss": 0.14231154322624207,
"eval_objective": 0.14071756601333618,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5258799195289612,
"eval_regularize": 0.14071756601333618,
"eval_runtime": 525.9214,
"eval_samples_per_second": 11.009,
"eval_steps_per_second": 0.918,
"eval_wo_beta": 15.648022651672363,
"step": 750
},
{
"dpo_loss": 0.6901395320892334,
"epoch": 2.13982050070855,
"grad_norm": 25.775133405076527,
"learning_rate": 2.2791164325437046e-07,
"logits": -1.2039532661437988,
"logps": -80.76856994628906,
"loss": 0.0536,
"objective": 0.054485421627759933,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5249999761581421,
"regularize": 0.054485421627759933,
"step": 755,
"wo_beta": 16.363035202026367
},
{
"dpo_loss": 0.6922858953475952,
"epoch": 2.153991497401984,
"grad_norm": 25.744794188993545,
"learning_rate": 2.21013089412392e-07,
"logits": -1.1505485773086548,
"logps": -77.95565795898438,
"loss": 0.0596,
"objective": 0.056366052478551865,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5,
"regularize": 0.056366052478551865,
"step": 760,
"wo_beta": 14.503907203674316
},
{
"dpo_loss": 0.6935012936592102,
"epoch": 2.168162494095418,
"grad_norm": 25.81314805277084,
"learning_rate": 2.1419080780610122e-07,
"logits": -1.195157527923584,
"logps": -79.0260009765625,
"loss": 0.0569,
"objective": 0.05813807621598244,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.05813807621598244,
"step": 765,
"wo_beta": 15.846463203430176
},
{
"dpo_loss": 0.6929753422737122,
"epoch": 2.182333490788852,
"grad_norm": 26.17366253681256,
"learning_rate": 2.0744666357916925e-07,
"logits": -1.2156563997268677,
"logps": -79.0594253540039,
"loss": 0.0599,
"objective": 0.06166267395019531,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5083333253860474,
"regularize": 0.06166267395019531,
"step": 770,
"wo_beta": 13.665863037109375
},
{
"dpo_loss": 0.6904846429824829,
"epoch": 2.196504487482286,
"grad_norm": 24.80909315966262,
"learning_rate": 2.0078250051328782e-07,
"logits": -1.20059072971344,
"logps": -79.86570739746094,
"loss": 0.0593,
"objective": 0.05707041174173355,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.574999988079071,
"regularize": 0.05707041174173355,
"step": 775,
"wo_beta": 16.53993797302246
},
{
"dpo_loss": 0.6920241117477417,
"epoch": 2.21067548417572,
"grad_norm": 26.21741329158667,
"learning_rate": 1.942001405240979e-07,
"logits": -1.1453113555908203,
"logps": -79.6847152709961,
"loss": 0.0544,
"objective": 0.05578133091330528,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.44583332538604736,
"ranking_simple": 0.4375,
"regularize": 0.05578133091330528,
"step": 780,
"wo_beta": 15.170312881469727
},
{
"dpo_loss": 0.6935942769050598,
"epoch": 2.2248464808691546,
"grad_norm": 28.321911906643972,
"learning_rate": 1.877013831630961e-07,
"logits": -1.1368038654327393,
"logps": -79.92477416992188,
"loss": 0.0563,
"objective": 0.0578266978263855,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4791666567325592,
"regularize": 0.0578266978263855,
"step": 785,
"wo_beta": 14.784603118896484
},
{
"dpo_loss": 0.6887209415435791,
"epoch": 2.2390174775625886,
"grad_norm": 25.1538491328267,
"learning_rate": 1.812880051256551e-07,
"logits": -1.1384888887405396,
"logps": -80.59889221191406,
"loss": 0.0504,
"objective": 0.04905276745557785,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.550000011920929,
"regularize": 0.04905276745557785,
"step": 790,
"wo_beta": 14.593072891235352
},
{
"dpo_loss": 0.6941591501235962,
"epoch": 2.2531884742560226,
"grad_norm": 25.628975208912717,
"learning_rate": 1.7496175976529337e-07,
"logits": -1.1934906244277954,
"logps": -81.73139953613281,
"loss": 0.053,
"objective": 0.05859142541885376,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5,
"regularize": 0.05859142541885376,
"step": 795,
"wo_beta": 13.79269790649414
},
{
"dpo_loss": 0.6919682621955872,
"epoch": 2.2673594709494567,
"grad_norm": 26.293732850411818,
"learning_rate": 1.6872437661432516e-07,
"logits": -1.2084691524505615,
"logps": -80.88973999023438,
"loss": 0.0531,
"objective": 0.05279294773936272,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.05279294773936272,
"step": 800,
"wo_beta": 14.340437889099121
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6981291174888611,
"eval_logits": -1.220612645149231,
"eval_logps": -86.13679504394531,
"eval_loss": 0.138593852519989,
"eval_objective": 0.13714565336704254,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5243270993232727,
"eval_regularize": 0.13714565336704254,
"eval_runtime": 503.3749,
"eval_samples_per_second": 11.502,
"eval_steps_per_second": 0.96,
"eval_wo_beta": 15.623366355895996,
"step": 800
},
{
"dpo_loss": 0.6895002126693726,
"epoch": 2.2815304676428907,
"grad_norm": 26.85852458075238,
"learning_rate": 1.62577560911024e-07,
"logits": -1.1975409984588623,
"logps": -79.75126647949219,
"loss": 0.0473,
"objective": 0.047933317720890045,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5166666507720947,
"regularize": 0.047933317720890045,
"step": 805,
"wo_beta": 15.455560684204102
},
{
"dpo_loss": 0.693041980266571,
"epoch": 2.295701464336325,
"grad_norm": 27.109828632522476,
"learning_rate": 1.565229931334277e-07,
"logits": -1.2860682010650635,
"logps": -79.39039611816406,
"loss": 0.051,
"objective": 0.04613161459565163,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5458333492279053,
"regularize": 0.04613161459565163,
"step": 810,
"wo_beta": 13.837719917297363
},
{
"dpo_loss": 0.6961421966552734,
"epoch": 2.309872461029759,
"grad_norm": 26.77232369418631,
"learning_rate": 1.5056232853991208e-07,
"logits": -1.2426903247833252,
"logps": -80.33802032470703,
"loss": 0.0483,
"objective": 0.04774492606520653,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4625000059604645,
"regularize": 0.04774492606520653,
"step": 815,
"wo_beta": 15.377904891967773
},
{
"dpo_loss": 0.6943568587303162,
"epoch": 2.324043457723193,
"grad_norm": 25.84415791966093,
"learning_rate": 1.4469719671666043e-07,
"logits": -1.1784952878952026,
"logps": -79.52135467529297,
"loss": 0.0497,
"objective": 0.0464615561068058,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5041666626930237,
"regularize": 0.0464615561068058,
"step": 820,
"wo_beta": 14.641592979431152
},
{
"dpo_loss": 0.6941722631454468,
"epoch": 2.3382144544166272,
"grad_norm": 26.057445300358456,
"learning_rate": 1.389292011321498e-07,
"logits": -1.1956678628921509,
"logps": -78.97592163085938,
"loss": 0.0489,
"objective": 0.04843177646398544,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5375000238418579,
"regularize": 0.04843177646398544,
"step": 825,
"wo_beta": 15.882107734680176
},
{
"dpo_loss": 0.6919335722923279,
"epoch": 2.3523854511100613,
"grad_norm": 25.587425832586177,
"learning_rate": 1.3325991869878012e-07,
"logits": -1.1966559886932373,
"logps": -81.00519561767578,
"loss": 0.0487,
"objective": 0.05618049576878548,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.05618049576878548,
"step": 830,
"wo_beta": 15.746501922607422
},
{
"dpo_loss": 0.6917215585708618,
"epoch": 2.3665564478034957,
"grad_norm": 25.756644403885232,
"learning_rate": 1.2769089934176126e-07,
"logits": -1.168601632118225,
"logps": -80.84972381591797,
"loss": 0.0488,
"objective": 0.052498627454042435,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.052498627454042435,
"step": 835,
"wo_beta": 14.608040809631348
},
{
"dpo_loss": 0.6898554563522339,
"epoch": 2.3807274444969297,
"grad_norm": 25.072094771225707,
"learning_rate": 1.222236655753791e-07,
"logits": -1.1249865293502808,
"logps": -80.45842742919922,
"loss": 0.0434,
"objective": 0.04277409613132477,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.4625000059604645,
"regularize": 0.04277409613132477,
"step": 840,
"wo_beta": 16.011308670043945
},
{
"dpo_loss": 0.6897058486938477,
"epoch": 2.3948984411903638,
"grad_norm": 31.2138593781791,
"learning_rate": 1.1685971208675538e-07,
"logits": -1.1826022863388062,
"logps": -81.36385345458984,
"loss": 0.0438,
"objective": 0.04376084357500076,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5333333611488342,
"regularize": 0.04376084357500076,
"step": 845,
"wo_beta": 15.694497108459473
},
{
"dpo_loss": 0.689830482006073,
"epoch": 2.409069437883798,
"grad_norm": 26.424193566129606,
"learning_rate": 1.1160050532721527e-07,
"logits": -1.2078933715820312,
"logps": -79.71755981445312,
"loss": 0.0444,
"objective": 0.04779530316591263,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.49166667461395264,
"regularize": 0.04779530316591263,
"step": 850,
"wo_beta": 15.619561195373535
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 0.6980399489402771,
"eval_logits": -1.2270959615707397,
"eval_logps": -86.03622436523438,
"eval_loss": 0.13948112726211548,
"eval_objective": 0.1381867229938507,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 0.1381867229938507,
"eval_runtime": 508.2715,
"eval_samples_per_second": 11.392,
"eval_steps_per_second": 0.95,
"eval_wo_beta": 15.647224426269531,
"step": 850
},
{
"dpo_loss": 0.690664529800415,
"epoch": 2.423240434577232,
"grad_norm": 26.086004792829357,
"learning_rate": 1.0644748311137375e-07,
"logits": -1.2208842039108276,
"logps": -79.23947143554688,
"loss": 0.0431,
"objective": 0.044093988835811615,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5249999761581421,
"regularize": 0.044093988835811615,
"step": 855,
"wo_beta": 14.724575996398926
},
{
"dpo_loss": 0.6878847479820251,
"epoch": 2.4374114312706663,
"grad_norm": 24.819758120044014,
"learning_rate": 1.0140205422405212e-07,
"logits": -1.172597050666809,
"logps": -80.47863006591797,
"loss": 0.0425,
"objective": 0.044025711715221405,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.044025711715221405,
"step": 860,
"wo_beta": 15.323599815368652
},
{
"dpo_loss": 0.6902381777763367,
"epoch": 2.4515824279641003,
"grad_norm": 27.313034441936136,
"learning_rate": 9.646559803512993e-08,
"logits": -1.2031606435775757,
"logps": -79.59320831298828,
"loss": 0.0444,
"objective": 0.04272008314728737,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.04272008314728737,
"step": 865,
"wo_beta": 15.875487327575684
},
{
"dpo_loss": 0.6910372376441956,
"epoch": 2.4657534246575343,
"grad_norm": 25.76666127477957,
"learning_rate": 9.163946412243895e-08,
"logits": -1.2454520463943481,
"logps": -80.33094024658203,
"loss": 0.0442,
"objective": 0.04635915905237198,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.04635915905237198,
"step": 870,
"wo_beta": 14.93254566192627
},
{
"dpo_loss": 0.6905195713043213,
"epoch": 2.4799244213509684,
"grad_norm": 25.65493367025704,
"learning_rate": 8.692497190280224e-08,
"logits": -1.193867802619934,
"logps": -79.73404693603516,
"loss": 0.044,
"objective": 0.04675581306219101,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.5625,
"regularize": 0.04675581306219101,
"step": 875,
"wo_beta": 16.489763259887695
},
{
"dpo_loss": 0.6905779242515564,
"epoch": 2.4940954180444024,
"grad_norm": 26.621663140091542,
"learning_rate": 8.232341027131883e-08,
"logits": -1.1066038608551025,
"logps": -79.80467224121094,
"loss": 0.0446,
"objective": 0.046583421528339386,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.046583421528339386,
"step": 880,
"wo_beta": 17.46852684020996
},
{
"dpo_loss": 0.6917292475700378,
"epoch": 2.5082664147378364,
"grad_norm": 24.02209120686893,
"learning_rate": 7.783603724899257e-08,
"logits": -1.25592041015625,
"logps": -79.1759262084961,
"loss": 0.0422,
"objective": 0.04294423386454582,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5249999761581421,
"regularize": 0.04294423386454582,
"step": 885,
"wo_beta": 16.415306091308594
},
{
"dpo_loss": 0.6880825161933899,
"epoch": 2.5224374114312704,
"grad_norm": 26.181840029139675,
"learning_rate": 7.346407963880136e-08,
"logits": -1.1791417598724365,
"logps": -78.21730041503906,
"loss": 0.0424,
"objective": 0.03773224726319313,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.03773224726319313,
"step": 890,
"wo_beta": 13.494309425354004
},
{
"dpo_loss": 0.692958414554596,
"epoch": 2.536608408124705,
"grad_norm": 27.615133075738825,
"learning_rate": 6.92087326903022e-08,
"logits": -1.175589680671692,
"logps": -80.6869888305664,
"loss": 0.0444,
"objective": 0.0476791188120842,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.48750001192092896,
"regularize": 0.0476791188120842,
"step": 895,
"wo_beta": 16.41474151611328
},
{
"dpo_loss": 0.6935379505157471,
"epoch": 2.550779404818139,
"grad_norm": 25.263999580012257,
"learning_rate": 6.507115977286143e-08,
"logits": -1.1382538080215454,
"logps": -79.20881652832031,
"loss": 0.0438,
"objective": 0.044265471398830414,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5208333134651184,
"regularize": 0.044265471398830414,
"step": 900,
"wo_beta": 15.096195220947266
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6975382566452026,
"eval_logits": -1.2295913696289062,
"eval_logps": -85.88396453857422,
"eval_loss": 0.13868437707424164,
"eval_objective": 0.13740767538547516,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 0.13740767538547516,
"eval_runtime": 525.8368,
"eval_samples_per_second": 11.011,
"eval_steps_per_second": 0.919,
"eval_wo_beta": 15.634546279907227,
"step": 900
},
{
"dpo_loss": 0.6917089819908142,
"epoch": 2.564950401511573,
"grad_norm": 25.44195334625603,
"learning_rate": 6.105249205760127e-08,
"logits": -1.2037063837051392,
"logps": -79.04875183105469,
"loss": 0.0411,
"objective": 0.03601410239934921,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.03601410239934921,
"step": 905,
"wo_beta": 14.861380577087402
},
{
"dpo_loss": 0.6932801008224487,
"epoch": 2.579121398205007,
"grad_norm": 26.495925146665332,
"learning_rate": 5.7153828208148846e-08,
"logits": -1.1827551126480103,
"logps": -81.922607421875,
"loss": 0.0424,
"objective": 0.04883956164121628,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.04883956164121628,
"step": 910,
"wo_beta": 15.852696418762207
},
{
"dpo_loss": 0.6898232102394104,
"epoch": 2.593292394898441,
"grad_norm": 25.88822340642525,
"learning_rate": 5.337623408027292e-08,
"logits": -1.2935634851455688,
"logps": -80.87789916992188,
"loss": 0.0403,
"objective": 0.040093984454870224,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 0.040093984454870224,
"step": 915,
"wo_beta": 14.905534744262695
},
{
"dpo_loss": 0.6920287013053894,
"epoch": 2.6074633915918755,
"grad_norm": 25.364010577767672,
"learning_rate": 4.972074243048896e-08,
"logits": -1.1468993425369263,
"logps": -79.89569854736328,
"loss": 0.0396,
"objective": 0.03967604413628578,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5249999761581421,
"regularize": 0.03967604413628578,
"step": 920,
"wo_beta": 15.246692657470703
},
{
"dpo_loss": 0.6928901076316833,
"epoch": 2.6216343882853095,
"grad_norm": 27.967184575096596,
"learning_rate": 4.6188352633713956e-08,
"logits": -1.1743673086166382,
"logps": -80.17101287841797,
"loss": 0.0417,
"objective": 0.04370425269007683,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.47083333134651184,
"regularize": 0.04370425269007683,
"step": 925,
"wo_beta": 16.336292266845703
},
{
"dpo_loss": 0.688522458076477,
"epoch": 2.6358053849787435,
"grad_norm": 26.578359144982873,
"learning_rate": 4.2780030410047796e-08,
"logits": -1.1617387533187866,
"logps": -79.97476196289062,
"loss": 0.0365,
"objective": 0.03662450239062309,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.44583332538604736,
"ranking_simple": 0.44583332538604736,
"regularize": 0.03662450239062309,
"step": 930,
"wo_beta": 16.801166534423828
},
{
"dpo_loss": 0.6928302645683289,
"epoch": 2.6499763816721775,
"grad_norm": 26.6756558913633,
"learning_rate": 3.949670756075446e-08,
"logits": -1.1548212766647339,
"logps": -78.78431701660156,
"loss": 0.0364,
"objective": 0.0356716513633728,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.574999988079071,
"regularize": 0.0356716513633728,
"step": 935,
"wo_beta": 15.733369827270508
},
{
"dpo_loss": 0.6884638071060181,
"epoch": 2.6641473783656116,
"grad_norm": 26.11837122854028,
"learning_rate": 3.63392817135173e-08,
"logits": -1.213140845298767,
"logps": -81.39899444580078,
"loss": 0.0357,
"objective": 0.03838236257433891,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5333333611488342,
"regularize": 0.03838236257433891,
"step": 940,
"wo_beta": 16.71453094482422
},
{
"dpo_loss": 0.6904810070991516,
"epoch": 2.678318375059046,
"grad_norm": 26.48243005501328,
"learning_rate": 3.330861607703611e-08,
"logits": -1.2477443218231201,
"logps": -80.07948303222656,
"loss": 0.0369,
"objective": 0.03517834097146988,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.48750001192092896,
"regularize": 0.03517834097146988,
"step": 945,
"wo_beta": 15.665254592895508
},
{
"dpo_loss": 0.6894643902778625,
"epoch": 2.69248937175248,
"grad_norm": 26.269248260275482,
"learning_rate": 3.040553920503502e-08,
"logits": -1.1376032829284668,
"logps": -80.89375305175781,
"loss": 0.0384,
"objective": 0.03873926401138306,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.03873926401138306,
"step": 950,
"wo_beta": 14.65186882019043
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 0.6974536180496216,
"eval_logits": -1.2285144329071045,
"eval_logps": -85.95899963378906,
"eval_loss": 0.13796193897724152,
"eval_objective": 0.13680347800254822,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.523809552192688,
"eval_regularize": 0.13680347800254822,
"eval_runtime": 502.396,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 0.961,
"eval_wo_beta": 15.642508506774902,
"step": 950
},
{
"dpo_loss": 0.6890572905540466,
"epoch": 2.706660368445914,
"grad_norm": 24.74397275822761,
"learning_rate": 2.7630844769743756e-08,
"logits": -1.2225416898727417,
"logps": -79.87822723388672,
"loss": 0.0403,
"objective": 0.04285174608230591,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.4749999940395355,
"regularize": 0.04285174608230591,
"step": 955,
"wo_beta": 13.80820369720459
},
{
"dpo_loss": 0.6908868551254272,
"epoch": 2.720831365139348,
"grad_norm": 25.907101929875015,
"learning_rate": 2.4985291344915673e-08,
"logits": -1.1964094638824463,
"logps": -79.958740234375,
"loss": 0.0384,
"objective": 0.03498096391558647,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5041666626930237,
"regularize": 0.03498096391558647,
"step": 960,
"wo_beta": 16.096843719482422
},
{
"dpo_loss": 0.6898122429847717,
"epoch": 2.735002361832782,
"grad_norm": 26.015895295989438,
"learning_rate": 2.2469602198441573e-08,
"logits": -1.2220391035079956,
"logps": -80.10702514648438,
"loss": 0.0368,
"objective": 0.03775167092680931,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.03775167092680931,
"step": 965,
"wo_beta": 14.61376953125
},
{
"dpo_loss": 0.6917709112167358,
"epoch": 2.7491733585262166,
"grad_norm": 24.33103792831753,
"learning_rate": 2.008446509461498e-08,
"logits": -1.2293510437011719,
"logps": -81.0619888305664,
"loss": 0.0341,
"objective": 0.03296136483550072,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5208333134651184,
"regularize": 0.03296136483550072,
"step": 970,
"wo_beta": 14.957200050354004
},
{
"dpo_loss": 0.6909447908401489,
"epoch": 2.7633443552196506,
"grad_norm": 24.892680282575437,
"learning_rate": 1.7830532106104746e-08,
"logits": -1.1391520500183105,
"logps": -79.50247955322266,
"loss": 0.0358,
"objective": 0.03571467101573944,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5416666865348816,
"regularize": 0.03571467101573944,
"step": 975,
"wo_beta": 15.747049331665039
},
{
"dpo_loss": 0.6906387209892273,
"epoch": 2.7775153519130846,
"grad_norm": 25.891776024282194,
"learning_rate": 1.570841943568446e-08,
"logits": -1.2599250078201294,
"logps": -78.82478332519531,
"loss": 0.0365,
"objective": 0.03682435303926468,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.49166667461395264,
"regularize": 0.03682435303926468,
"step": 980,
"wo_beta": 14.397340774536133
},
{
"dpo_loss": 0.6933729648590088,
"epoch": 2.7916863486065187,
"grad_norm": 24.71596998222205,
"learning_rate": 1.3718707247769134e-08,
"logits": -1.1248877048492432,
"logps": -77.72516632080078,
"loss": 0.038,
"objective": 0.03822270780801773,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.48750001192092896,
"regularize": 0.03822270780801773,
"step": 985,
"wo_beta": 14.327728271484375
},
{
"dpo_loss": 0.691889762878418,
"epoch": 2.8058573452999527,
"grad_norm": 26.185929406261582,
"learning_rate": 1.1861939509803686e-08,
"logits": -1.1771855354309082,
"logps": -81.14643859863281,
"loss": 0.0369,
"objective": 0.036898624151945114,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.47083333134651184,
"regularize": 0.036898624151945114,
"step": 990,
"wo_beta": 15.375889778137207
},
{
"dpo_loss": 0.6891864538192749,
"epoch": 2.820028341993387,
"grad_norm": 24.803225677825235,
"learning_rate": 1.0138623843548078e-08,
"logits": -1.2396986484527588,
"logps": -79.1412353515625,
"loss": 0.0365,
"objective": 0.04024568572640419,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5458333492279053,
"regularize": 0.04024568572640419,
"step": 995,
"wo_beta": 16.440141677856445
},
{
"dpo_loss": 0.6907335519790649,
"epoch": 2.8341993386868207,
"grad_norm": 24.80804716491088,
"learning_rate": 8.54923138629815e-09,
"logits": -1.1814649105072021,
"logps": -78.3318862915039,
"loss": 0.0375,
"objective": 0.03398551046848297,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5208333134651184,
"regularize": 0.03398551046848297,
"step": 1000,
"wo_beta": 14.515811920166016
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6973779201507568,
"eval_logits": -1.2304595708847046,
"eval_logps": -85.99760437011719,
"eval_loss": 0.1379886120557785,
"eval_objective": 0.1368565410375595,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5243270993232727,
"eval_regularize": 0.1368565410375595,
"eval_runtime": 504.9482,
"eval_samples_per_second": 11.467,
"eval_steps_per_second": 0.957,
"eval_wo_beta": 15.63548755645752,
"step": 1000
},
{
"dpo_loss": 0.6911761164665222,
"epoch": 2.848370335380255,
"grad_norm": 27.32667601221845,
"learning_rate": 7.09419666208183e-09,
"logits": -1.1803662776947021,
"logps": -78.7650375366211,
"loss": 0.036,
"objective": 0.03725501522421837,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5208333134651184,
"regularize": 0.03725501522421837,
"step": 1005,
"wo_beta": 15.2937593460083
},
{
"dpo_loss": 0.6888595223426819,
"epoch": 2.862541332073689,
"grad_norm": 26.14400831689978,
"learning_rate": 5.773917462864264e-09,
"logits": -1.2407745122909546,
"logps": -79.07453918457031,
"loss": 0.0359,
"objective": 0.03689141198992729,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5291666388511658,
"regularize": 0.03689141198992729,
"step": 1010,
"wo_beta": 15.180621147155762
},
{
"dpo_loss": 0.6912004947662354,
"epoch": 2.8767123287671232,
"grad_norm": 24.9602315307722,
"learning_rate": 4.588754739795586e-09,
"logits": -1.1721571683883667,
"logps": -78.31599426269531,
"loss": 0.0354,
"objective": 0.03823023661971092,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.550000011920929,
"regularize": 0.03823023661971092,
"step": 1015,
"wo_beta": 14.313817977905273
},
{
"dpo_loss": 0.6896302700042725,
"epoch": 2.8908833254605573,
"grad_norm": 24.85258883289883,
"learning_rate": 3.53903250453047e-09,
"logits": -1.1410295963287354,
"logps": -80.05741882324219,
"loss": 0.0343,
"objective": 0.03470051661133766,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5166666507720947,
"regularize": 0.03470051661133766,
"step": 1020,
"wo_beta": 17.722339630126953
},
{
"dpo_loss": 0.6912213563919067,
"epoch": 2.9050543221539913,
"grad_norm": 25.437671735836517,
"learning_rate": 2.6250377406467627e-09,
"logits": -1.2291027307510376,
"logps": -80.00859832763672,
"loss": 0.0379,
"objective": 0.037315838038921356,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5041666626930237,
"regularize": 0.037315838038921356,
"step": 1025,
"wo_beta": 14.656061172485352
},
{
"dpo_loss": 0.6911433935165405,
"epoch": 2.9192253188474258,
"grad_norm": 24.681518212372314,
"learning_rate": 1.8470203251865768e-09,
"logits": -1.2523103952407837,
"logps": -80.20305633544922,
"loss": 0.035,
"objective": 0.03597547858953476,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4791666567325592,
"regularize": 0.03597547858953476,
"step": 1030,
"wo_beta": 16.243247985839844
},
{
"dpo_loss": 0.6904833316802979,
"epoch": 2.9333963155408598,
"grad_norm": 26.808499612926756,
"learning_rate": 1.2051929603428823e-09,
"logits": -1.2276477813720703,
"logps": -80.6124496459961,
"loss": 0.0344,
"objective": 0.03077917918562889,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5041666626930237,
"regularize": 0.03077917918562889,
"step": 1035,
"wo_beta": 14.297567367553711
},
{
"dpo_loss": 0.6902684569358826,
"epoch": 2.947567312234294,
"grad_norm": 24.71043561481991,
"learning_rate": 6.997311153086882e-10,
"logits": -1.227773904800415,
"logps": -80.38175201416016,
"loss": 0.0364,
"objective": 0.036134228110313416,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.036134228110313416,
"step": 1040,
"wo_beta": 16.110403060913086
},
{
"dpo_loss": 0.6894943118095398,
"epoch": 2.961738308927728,
"grad_norm": 26.305013618654215,
"learning_rate": 3.3077297830541585e-10,
"logits": -1.1821495294570923,
"logps": -81.93363189697266,
"loss": 0.0371,
"objective": 0.04041092470288277,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5541666746139526,
"regularize": 0.04041092470288277,
"step": 1045,
"wo_beta": 17.30424690246582
},
{
"dpo_loss": 0.6926708221435547,
"epoch": 2.975909305621162,
"grad_norm": 27.660126015515125,
"learning_rate": 9.841941880361914e-11,
"logits": -1.2283350229263306,
"logps": -78.42631530761719,
"loss": 0.0397,
"objective": 0.03637199103832245,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.550000011920929,
"regularize": 0.03637199103832245,
"step": 1050,
"wo_beta": 14.132574081420898
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 0.697369396686554,
"eval_logits": -1.230570673942566,
"eval_logps": -85.98023223876953,
"eval_loss": 0.13814175128936768,
"eval_objective": 0.13700547814369202,
"eval_ranking_idealized": 0.6024844646453857,
"eval_ranking_idealized_expo": 0.5232919454574585,
"eval_ranking_simple": 0.5243270993232727,
"eval_regularize": 0.13700547814369202,
"eval_runtime": 530.5394,
"eval_samples_per_second": 10.913,
"eval_steps_per_second": 0.91,
"eval_wo_beta": 15.63470458984375,
"step": 1050
},
{
"dpo_loss": 0.689972996711731,
"epoch": 2.9900803023145963,
"grad_norm": 25.71242634224602,
"learning_rate": 2.7339599464326622e-12,
"logits": -1.2016465663909912,
"logps": -79.08844757080078,
"loss": 0.0389,
"objective": 0.03705615550279617,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5541666746139526,
"regularize": 0.03705615550279617,
"step": 1055,
"wo_beta": 14.549761772155762
},
{
"epoch": 2.992914501653283,
"step": 1056,
"total_flos": 0.0,
"train_loss": 0.08480868444806247,
"train_runtime": 47353.1169,
"train_samples_per_second": 3.218,
"train_steps_per_second": 0.022
}
],
"logging_steps": 5,
"max_steps": 1056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}