qwen2.5-0.5b-expo-DPO-noES-0.1 / trainer_state.json
hZzy's picture
Model save
3678860 verified
raw
history blame
106 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.992914501653283,
"eval_steps": 50,
"global_step": 1056,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 18.39703870091124,
"learning_rate": 9.433962264150943e-09,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 0.6931,
"objective": 0.6931471824645996,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6931471824645996,
"step": 1,
"wo_beta": 5.2708282470703125
},
{
"dpo_loss": 0.6930365562438965,
"epoch": 0.014170996693434105,
"grad_norm": 18.759209504732585,
"learning_rate": 4.7169811320754715e-08,
"logits": -1.429247498512268,
"logps": -83.84073638916016,
"loss": 0.6931,
"objective": 0.6930365562438965,
"ranking_simple": 0.4895833432674408,
"regularize": 0.6930365562438965,
"step": 5,
"wo_beta": 7.670312404632568
},
{
"dpo_loss": 0.6933786869049072,
"epoch": 0.02834199338686821,
"grad_norm": 20.439115048712264,
"learning_rate": 9.433962264150943e-08,
"logits": -1.40127694606781,
"logps": -84.69605255126953,
"loss": 0.6932,
"objective": 0.6933786869049072,
"ranking_simple": 0.5791666507720947,
"regularize": 0.6933786869049072,
"step": 10,
"wo_beta": 5.937962055206299
},
{
"dpo_loss": 0.6909440755844116,
"epoch": 0.042512990080302314,
"grad_norm": 18.777743501771415,
"learning_rate": 1.4150943396226414e-07,
"logits": -1.5383602380752563,
"logps": -84.21109771728516,
"loss": 0.6921,
"objective": 0.6909440755844116,
"ranking_simple": 0.5666666626930237,
"regularize": 0.6909440755844116,
"step": 15,
"wo_beta": 6.64866828918457
},
{
"dpo_loss": 0.6903365254402161,
"epoch": 0.05668398677373642,
"grad_norm": 18.03474083688221,
"learning_rate": 1.8867924528301886e-07,
"logits": -1.3631871938705444,
"logps": -83.19156646728516,
"loss": 0.6904,
"objective": 0.6903365254402161,
"ranking_simple": 0.48750001192092896,
"regularize": 0.6903365254402161,
"step": 20,
"wo_beta": 8.294000625610352
},
{
"dpo_loss": 0.6920856237411499,
"epoch": 0.07085498346717052,
"grad_norm": 17.418659582335064,
"learning_rate": 2.3584905660377358e-07,
"logits": -1.3736237287521362,
"logps": -83.09373474121094,
"loss": 0.6904,
"objective": 0.6920856237411499,
"ranking_simple": 0.5666666626930237,
"regularize": 0.6920856237411499,
"step": 25,
"wo_beta": 6.352960109710693
},
{
"dpo_loss": 0.686029851436615,
"epoch": 0.08502598016060463,
"grad_norm": 16.102325248569215,
"learning_rate": 2.830188679245283e-07,
"logits": -1.4392812252044678,
"logps": -82.93743133544922,
"loss": 0.6866,
"objective": 0.686029851436615,
"ranking_simple": 0.48750001192092896,
"regularize": 0.686029851436615,
"step": 30,
"wo_beta": 8.934405326843262
},
{
"dpo_loss": 0.692076563835144,
"epoch": 0.09919697685403873,
"grad_norm": 17.863783183076404,
"learning_rate": 3.30188679245283e-07,
"logits": -1.4204154014587402,
"logps": -82.21839904785156,
"loss": 0.6871,
"objective": 0.692076563835144,
"ranking_simple": 0.5333333611488342,
"regularize": 0.692076563835144,
"step": 35,
"wo_beta": 6.40147590637207
},
{
"dpo_loss": 0.68491530418396,
"epoch": 0.11336797354747284,
"grad_norm": 19.094434967317397,
"learning_rate": 3.773584905660377e-07,
"logits": -1.4201196432113647,
"logps": -82.76591491699219,
"loss": 0.6809,
"objective": 0.68491530418396,
"ranking_simple": 0.5166666507720947,
"regularize": 0.68491530418396,
"step": 40,
"wo_beta": 6.802278518676758
},
{
"dpo_loss": 0.681048572063446,
"epoch": 0.12753897024090693,
"grad_norm": 19.32762735250478,
"learning_rate": 4.2452830188679244e-07,
"logits": -1.448870301246643,
"logps": -82.9134292602539,
"loss": 0.6834,
"objective": 0.681048572063446,
"ranking_simple": 0.5249999761581421,
"regularize": 0.681048572063446,
"step": 45,
"wo_beta": 6.972365856170654
},
{
"dpo_loss": 0.676356852054596,
"epoch": 0.14170996693434104,
"grad_norm": 18.06878353401125,
"learning_rate": 4.7169811320754717e-07,
"logits": -1.4718233346939087,
"logps": -83.58888244628906,
"loss": 0.6719,
"objective": 0.676356852054596,
"ranking_simple": 0.4958333373069763,
"regularize": 0.676356852054596,
"step": 50,
"wo_beta": 7.712605953216553
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6879124045372009,
"eval_logits": -1.4697412252426147,
"eval_logps": -89.67761993408203,
"eval_loss": 0.685627281665802,
"eval_objective": 0.6879124045372009,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 0.6879124045372009,
"eval_runtime": 368.3427,
"eval_samples_per_second": 15.719,
"eval_steps_per_second": 1.311,
"eval_wo_beta": 7.922134876251221,
"step": 50
},
{
"dpo_loss": 0.6702864766120911,
"epoch": 0.15588096362777515,
"grad_norm": 19.335503669780774,
"learning_rate": 5.188679245283019e-07,
"logits": -1.4462206363677979,
"logps": -83.2486801147461,
"loss": 0.6744,
"objective": 0.6702864766120911,
"ranking_simple": 0.4958333373069763,
"regularize": 0.6702864766120911,
"step": 55,
"wo_beta": 7.6053338050842285
},
{
"dpo_loss": 0.6743167638778687,
"epoch": 0.17005196032120926,
"grad_norm": 19.32608740622528,
"learning_rate": 5.660377358490566e-07,
"logits": -1.3781672716140747,
"logps": -84.30441284179688,
"loss": 0.6703,
"objective": 0.6743167638778687,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6743167638778687,
"step": 60,
"wo_beta": 6.470597267150879
},
{
"dpo_loss": 0.6568139791488647,
"epoch": 0.18422295701464336,
"grad_norm": 21.002392018495687,
"learning_rate": 6.132075471698112e-07,
"logits": -1.4774647951126099,
"logps": -81.55781555175781,
"loss": 0.6619,
"objective": 0.6568139791488647,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6568139791488647,
"step": 65,
"wo_beta": 6.502650260925293
},
{
"dpo_loss": 0.6504150629043579,
"epoch": 0.19839395370807747,
"grad_norm": 18.63585310886347,
"learning_rate": 6.60377358490566e-07,
"logits": -1.5209298133850098,
"logps": -84.6080551147461,
"loss": 0.6501,
"objective": 0.6504150629043579,
"ranking_simple": 0.5625,
"regularize": 0.6504150629043579,
"step": 70,
"wo_beta": 6.957874774932861
},
{
"dpo_loss": 0.653372049331665,
"epoch": 0.21256495040151158,
"grad_norm": 20.115959212821757,
"learning_rate": 7.075471698113207e-07,
"logits": -1.4559004306793213,
"logps": -85.126953125,
"loss": 0.6557,
"objective": 0.653372049331665,
"ranking_simple": 0.5458333492279053,
"regularize": 0.653372049331665,
"step": 75,
"wo_beta": 6.480815410614014
},
{
"dpo_loss": 0.6524909734725952,
"epoch": 0.22673594709494568,
"grad_norm": 23.881541586489003,
"learning_rate": 7.547169811320754e-07,
"logits": -1.567487120628357,
"logps": -84.74055480957031,
"loss": 0.6601,
"objective": 0.6524909734725952,
"ranking_simple": 0.5666666626930237,
"regularize": 0.6524909734725952,
"step": 80,
"wo_beta": 6.057122707366943
},
{
"dpo_loss": 0.6443823575973511,
"epoch": 0.2409069437883798,
"grad_norm": 18.676003369955335,
"learning_rate": 8.018867924528302e-07,
"logits": -1.5786373615264893,
"logps": -84.9271011352539,
"loss": 0.6489,
"objective": 0.6443823575973511,
"ranking_simple": 0.5666666626930237,
"regularize": 0.6443823575973511,
"step": 85,
"wo_beta": 5.872949600219727
},
{
"dpo_loss": 0.6308037638664246,
"epoch": 0.25507794048181387,
"grad_norm": 18.36712876918143,
"learning_rate": 8.490566037735849e-07,
"logits": -1.693374752998352,
"logps": -85.01197052001953,
"loss": 0.6429,
"objective": 0.6308037638664246,
"ranking_simple": 0.5458333492279053,
"regularize": 0.6308037638664246,
"step": 90,
"wo_beta": 6.442193031311035
},
{
"dpo_loss": 0.6293771266937256,
"epoch": 0.269248937175248,
"grad_norm": 19.143294691787162,
"learning_rate": 8.962264150943396e-07,
"logits": -1.6244534254074097,
"logps": -84.94630432128906,
"loss": 0.6392,
"objective": 0.6293771266937256,
"ranking_simple": 0.5208333134651184,
"regularize": 0.6293771266937256,
"step": 95,
"wo_beta": 7.510855197906494
},
{
"dpo_loss": 0.6378400921821594,
"epoch": 0.2834199338686821,
"grad_norm": 18.923136932397636,
"learning_rate": 9.433962264150943e-07,
"logits": -1.531855821609497,
"logps": -84.18727111816406,
"loss": 0.6459,
"objective": 0.6378400921821594,
"ranking_simple": 0.5291666388511658,
"regularize": 0.6378400921821594,
"step": 100,
"wo_beta": 6.881345748901367
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6793138384819031,
"eval_logits": -1.6510542631149292,
"eval_logps": -92.99544525146484,
"eval_loss": 0.6764773726463318,
"eval_objective": 0.6793138384819031,
"eval_ranking_simple": 0.534679114818573,
"eval_regularize": 0.6793138384819031,
"eval_runtime": 367.8383,
"eval_samples_per_second": 15.741,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 7.872693061828613,
"step": 100
},
{
"dpo_loss": 0.6514045000076294,
"epoch": 0.2975909305621162,
"grad_norm": 24.32830193794003,
"learning_rate": 9.90566037735849e-07,
"logits": -1.7460098266601562,
"logps": -88.28630828857422,
"loss": 0.6307,
"objective": 0.6514045000076294,
"ranking_simple": 0.574999988079071,
"regularize": 0.6514045000076294,
"step": 105,
"wo_beta": 6.015078067779541
},
{
"dpo_loss": 0.6186487674713135,
"epoch": 0.3117619272555503,
"grad_norm": 21.425599904477778,
"learning_rate": 9.99956257238817e-07,
"logits": -1.6647683382034302,
"logps": -88.7564468383789,
"loss": 0.619,
"objective": 0.6186487674713135,
"ranking_simple": 0.5249999761581421,
"regularize": 0.6186487674713135,
"step": 110,
"wo_beta": 6.501535892486572
},
{
"dpo_loss": 0.6382968425750732,
"epoch": 0.32593292394898443,
"grad_norm": 23.57030350866866,
"learning_rate": 9.997785653888834e-07,
"logits": -1.6553956270217896,
"logps": -88.90452575683594,
"loss": 0.6244,
"objective": 0.6382968425750732,
"ranking_simple": 0.5666666626930237,
"regularize": 0.6382968425750732,
"step": 115,
"wo_beta": 6.804749488830566
},
{
"dpo_loss": 0.613271176815033,
"epoch": 0.3401039206424185,
"grad_norm": 17.498249311505813,
"learning_rate": 9.994642390694308e-07,
"logits": -1.6392256021499634,
"logps": -87.69567108154297,
"loss": 0.6212,
"objective": 0.613271176815033,
"ranking_simple": 0.574999988079071,
"regularize": 0.613271176815033,
"step": 120,
"wo_beta": 7.195651054382324
},
{
"dpo_loss": 0.6102784872055054,
"epoch": 0.35427491733585265,
"grad_norm": 17.38350046383242,
"learning_rate": 9.990133642141357e-07,
"logits": -1.6804019212722778,
"logps": -89.06767272949219,
"loss": 0.6218,
"objective": 0.6102784872055054,
"ranking_simple": 0.5208333134651184,
"regularize": 0.6102784872055054,
"step": 125,
"wo_beta": 7.266691207885742
},
{
"dpo_loss": 0.6158983111381531,
"epoch": 0.3684459140292867,
"grad_norm": 18.419918693556507,
"learning_rate": 9.98426064087682e-07,
"logits": -1.6602026224136353,
"logps": -86.5518569946289,
"loss": 0.6238,
"objective": 0.6158983111381531,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6158983111381531,
"step": 130,
"wo_beta": 5.715666770935059
},
{
"dpo_loss": 0.6510148644447327,
"epoch": 0.3826169107227208,
"grad_norm": 21.943398704973582,
"learning_rate": 9.977024992520601e-07,
"logits": -1.7066783905029297,
"logps": -88.5932846069336,
"loss": 0.6289,
"objective": 0.6510148644447327,
"ranking_simple": 0.5166666507720947,
"regularize": 0.6510148644447327,
"step": 135,
"wo_beta": 7.166342258453369
},
{
"dpo_loss": 0.6169579029083252,
"epoch": 0.39678790741615494,
"grad_norm": 19.9956879065625,
"learning_rate": 9.968428675226713e-07,
"logits": -1.6791799068450928,
"logps": -89.6558837890625,
"loss": 0.6144,
"objective": 0.6169579029083252,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6169579029083252,
"step": 140,
"wo_beta": 6.211281776428223
},
{
"dpo_loss": 0.5698094964027405,
"epoch": 0.410958904109589,
"grad_norm": 20.71491947708098,
"learning_rate": 9.958474039142469e-07,
"logits": -1.6847442388534546,
"logps": -89.56360626220703,
"loss": 0.5875,
"objective": 0.5698094964027405,
"ranking_simple": 0.6499999761581421,
"regularize": 0.5698094964027405,
"step": 145,
"wo_beta": 5.130297660827637
},
{
"dpo_loss": 0.6153029203414917,
"epoch": 0.42512990080302315,
"grad_norm": 19.57556543746145,
"learning_rate": 9.947163805765979e-07,
"logits": -1.70354425907135,
"logps": -89.73717498779297,
"loss": 0.5993,
"objective": 0.6153029203414917,
"ranking_simple": 0.550000011920929,
"regularize": 0.6153029203414917,
"step": 150,
"wo_beta": 8.4924955368042
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6804503202438354,
"eval_logits": -1.6963125467300415,
"eval_logps": -95.27294921875,
"eval_loss": 0.6770597696304321,
"eval_objective": 0.6804503202438354,
"eval_ranking_simple": 0.534679114818573,
"eval_regularize": 0.6804503202438354,
"eval_runtime": 370.2983,
"eval_samples_per_second": 15.636,
"eval_steps_per_second": 1.304,
"eval_wo_beta": 8.2155179977417,
"step": 150
},
{
"dpo_loss": 0.59377521276474,
"epoch": 0.43930089749645723,
"grad_norm": 23.61683273964934,
"learning_rate": 9.934501067202117e-07,
"logits": -1.7478511333465576,
"logps": -87.8529052734375,
"loss": 0.5935,
"objective": 0.59377521276474,
"ranking_simple": 0.5375000238418579,
"regularize": 0.59377521276474,
"step": 155,
"wo_beta": 7.3897318840026855
},
{
"dpo_loss": 0.589878261089325,
"epoch": 0.45347189418989137,
"grad_norm": 19.912032474590337,
"learning_rate": 9.92048928531717e-07,
"logits": -1.6176892518997192,
"logps": -88.71855926513672,
"loss": 0.5812,
"objective": 0.589878261089325,
"ranking_simple": 0.5874999761581421,
"regularize": 0.589878261089325,
"step": 160,
"wo_beta": 6.206328868865967
},
{
"dpo_loss": 0.5920292735099792,
"epoch": 0.46764289088332545,
"grad_norm": 21.374874744599914,
"learning_rate": 9.905132290792392e-07,
"logits": -1.745171308517456,
"logps": -91.21366119384766,
"loss": 0.5859,
"objective": 0.5920292735099792,
"ranking_simple": 0.5416666865348816,
"regularize": 0.5920292735099792,
"step": 165,
"wo_beta": 6.208406448364258
},
{
"dpo_loss": 0.6067599654197693,
"epoch": 0.4818138875767596,
"grad_norm": 19.998053262503415,
"learning_rate": 9.888434282076757e-07,
"logits": -1.8083672523498535,
"logps": -91.927734375,
"loss": 0.5862,
"objective": 0.6067599654197693,
"ranking_simple": 0.5375000238418579,
"regularize": 0.6067599654197693,
"step": 170,
"wo_beta": 7.094420433044434
},
{
"dpo_loss": 0.5829775929450989,
"epoch": 0.49598488427019366,
"grad_norm": 22.779470361992754,
"learning_rate": 9.870399824239114e-07,
"logits": -1.6424000263214111,
"logps": -91.81340789794922,
"loss": 0.5844,
"objective": 0.5829775929450989,
"ranking_simple": 0.5708333253860474,
"regularize": 0.5829775929450989,
"step": 175,
"wo_beta": 6.69417667388916
},
{
"dpo_loss": 0.5724970698356628,
"epoch": 0.5101558809636277,
"grad_norm": 19.56386680488567,
"learning_rate": 9.851033847720164e-07,
"logits": -1.6553891897201538,
"logps": -91.84220123291016,
"loss": 0.5461,
"objective": 0.5724970698356628,
"ranking_simple": 0.5375000238418579,
"regularize": 0.5724970698356628,
"step": 180,
"wo_beta": 6.825948238372803
},
{
"dpo_loss": 0.5537912845611572,
"epoch": 0.5243268776570619,
"grad_norm": 23.895057754013074,
"learning_rate": 9.83034164698452e-07,
"logits": -1.6311272382736206,
"logps": -90.77505493164062,
"loss": 0.5628,
"objective": 0.5537912845611572,
"ranking_simple": 0.5791666507720947,
"regularize": 0.5537912845611572,
"step": 185,
"wo_beta": 5.920953750610352
},
{
"dpo_loss": 0.5373654961585999,
"epoch": 0.538497874350496,
"grad_norm": 23.921385246249166,
"learning_rate": 9.808328879073251e-07,
"logits": -1.727196216583252,
"logps": -92.44843292236328,
"loss": 0.5475,
"objective": 0.5373654961585999,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5373654961585999,
"step": 190,
"wo_beta": 5.952234745025635
},
{
"dpo_loss": 0.5564671754837036,
"epoch": 0.5526688710439301,
"grad_norm": 24.71069656183637,
"learning_rate": 9.78500156205731e-07,
"logits": -1.7691571712493896,
"logps": -102.49636840820312,
"loss": 0.5838,
"objective": 0.5564671754837036,
"ranking_simple": 0.6041666865348816,
"regularize": 0.5564671754837036,
"step": 195,
"wo_beta": 6.274513244628906
},
{
"dpo_loss": 0.5659002661705017,
"epoch": 0.5668398677373642,
"grad_norm": 18.721765242615913,
"learning_rate": 9.760366073392244e-07,
"logits": -1.7705143690109253,
"logps": -108.12271118164062,
"loss": 0.5557,
"objective": 0.5659002661705017,
"ranking_simple": 0.5583333373069763,
"regularize": 0.5659002661705017,
"step": 200,
"wo_beta": 6.690428733825684
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6866137981414795,
"eval_logits": -1.8150068521499634,
"eval_logps": -115.46802520751953,
"eval_loss": 0.6857941150665283,
"eval_objective": 0.6866137981414795,
"eval_ranking_simple": 0.5295031070709229,
"eval_regularize": 0.6866137981414795,
"eval_runtime": 369.0235,
"eval_samples_per_second": 15.69,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 7.96071720123291,
"step": 200
},
{
"dpo_loss": 0.5478584170341492,
"epoch": 0.5810108644307983,
"grad_norm": 21.07102836538459,
"learning_rate": 9.734429148174674e-07,
"logits": -1.6568210124969482,
"logps": -108.47331237792969,
"loss": 0.545,
"objective": 0.5478584170341492,
"ranking_simple": 0.5874999761581421,
"regularize": 0.5478584170341492,
"step": 205,
"wo_beta": 7.015623092651367
},
{
"dpo_loss": 0.5228937268257141,
"epoch": 0.5951818611242324,
"grad_norm": 20.651776585733355,
"learning_rate": 9.707197877300973e-07,
"logits": -1.7239099740982056,
"logps": -108.58413696289062,
"loss": 0.5299,
"objective": 0.5228937268257141,
"ranking_simple": 0.625,
"regularize": 0.5228937268257141,
"step": 210,
"wo_beta": 5.675539970397949
},
{
"dpo_loss": 0.5450037121772766,
"epoch": 0.6093528578176665,
"grad_norm": 23.006204591176342,
"learning_rate": 9.678679705528698e-07,
"logits": -1.818426251411438,
"logps": -108.85588073730469,
"loss": 0.5504,
"objective": 0.5450037121772766,
"ranking_simple": 0.5625,
"regularize": 0.5450037121772766,
"step": 215,
"wo_beta": 6.871977806091309
},
{
"dpo_loss": 0.5016953945159912,
"epoch": 0.6235238545111006,
"grad_norm": 21.55242582954206,
"learning_rate": 9.648882429441256e-07,
"logits": -1.796720266342163,
"logps": -106.0498046875,
"loss": 0.5337,
"objective": 0.5016953945159912,
"ranking_simple": 0.5958333611488342,
"regularize": 0.5016953945159912,
"step": 220,
"wo_beta": 6.975856304168701
},
{
"dpo_loss": 0.5535920262336731,
"epoch": 0.6376948512045347,
"grad_norm": 18.336769425586553,
"learning_rate": 9.61781419531641e-07,
"logits": -1.8808425664901733,
"logps": -103.36882781982422,
"loss": 0.5444,
"objective": 0.5535920262336731,
"ranking_simple": 0.5833333134651184,
"regularize": 0.5535920262336731,
"step": 225,
"wo_beta": 6.945814609527588
},
{
"dpo_loss": 0.5067029595375061,
"epoch": 0.6518658478979689,
"grad_norm": 20.395537847593562,
"learning_rate": 9.585483496899149e-07,
"logits": -1.781941533088684,
"logps": -102.78409576416016,
"loss": 0.5275,
"objective": 0.5067029595375061,
"ranking_simple": 0.6541666388511658,
"regularize": 0.5067029595375061,
"step": 230,
"wo_beta": 4.95630407333374
},
{
"dpo_loss": 0.5263319611549377,
"epoch": 0.6660368445914029,
"grad_norm": 24.579908080459226,
"learning_rate": 9.551899173079606e-07,
"logits": -1.7018815279006958,
"logps": -107.19641876220703,
"loss": 0.5235,
"objective": 0.5263319611549377,
"ranking_simple": 0.5958333611488342,
"regularize": 0.5263319611549377,
"step": 235,
"wo_beta": 6.136026859283447
},
{
"dpo_loss": 0.5419639945030212,
"epoch": 0.680207841284837,
"grad_norm": 25.457069150013837,
"learning_rate": 9.517070405476574e-07,
"logits": -1.8716365098953247,
"logps": -109.35581970214844,
"loss": 0.5391,
"objective": 0.5419639945030212,
"ranking_simple": 0.637499988079071,
"regularize": 0.5419639945030212,
"step": 240,
"wo_beta": 7.386639595031738
},
{
"dpo_loss": 0.599401593208313,
"epoch": 0.6943788379782712,
"grad_norm": 20.952521291073488,
"learning_rate": 9.481006715927351e-07,
"logits": -1.8737353086471558,
"logps": -105.67475128173828,
"loss": 0.5586,
"objective": 0.599401593208313,
"ranking_simple": 0.625,
"regularize": 0.599401593208313,
"step": 245,
"wo_beta": 6.23760986328125
},
{
"dpo_loss": 0.5379212498664856,
"epoch": 0.7085498346717053,
"grad_norm": 17.076599406497994,
"learning_rate": 9.443717963884568e-07,
"logits": -1.6231579780578613,
"logps": -97.79161834716797,
"loss": 0.5428,
"objective": 0.5379212498664856,
"ranking_simple": 0.6083333492279053,
"regularize": 0.5379212498664856,
"step": 250,
"wo_beta": 5.470564842224121
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.674086332321167,
"eval_logits": -1.8494712114334106,
"eval_logps": -102.56678771972656,
"eval_loss": 0.6744823455810547,
"eval_objective": 0.674086332321167,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 0.674086332321167,
"eval_runtime": 369.2355,
"eval_samples_per_second": 15.681,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 7.989133358001709,
"step": 250
},
{
"dpo_loss": 0.4980238378047943,
"epoch": 0.7227208313651393,
"grad_norm": 18.645503031645394,
"learning_rate": 9.405214343720706e-07,
"logits": -1.8677008152008057,
"logps": -94.05529022216797,
"loss": 0.5176,
"objective": 0.4980238378047943,
"ranking_simple": 0.5833333134651184,
"regularize": 0.4980238378047943,
"step": 255,
"wo_beta": 5.837521553039551
},
{
"dpo_loss": 0.5208728909492493,
"epoch": 0.7368918280585735,
"grad_norm": 22.040310265005534,
"learning_rate": 9.365506381941065e-07,
"logits": -1.8145065307617188,
"logps": -96.94599914550781,
"loss": 0.5381,
"objective": 0.5208728909492493,
"ranking_simple": 0.5708333253860474,
"regularize": 0.5208728909492493,
"step": 260,
"wo_beta": 6.736252784729004
},
{
"dpo_loss": 0.4980500638484955,
"epoch": 0.7510628247520076,
"grad_norm": 23.39044199948042,
"learning_rate": 9.32460493430591e-07,
"logits": -1.7500866651535034,
"logps": -96.90016174316406,
"loss": 0.5023,
"objective": 0.4980500638484955,
"ranking_simple": 0.6583333611488342,
"regularize": 0.4980500638484955,
"step": 265,
"wo_beta": 6.221852779388428
},
{
"dpo_loss": 0.5561876893043518,
"epoch": 0.7652338214454416,
"grad_norm": 20.73702481860498,
"learning_rate": 9.282521182862629e-07,
"logits": -1.8606762886047363,
"logps": -96.93506622314453,
"loss": 0.5259,
"objective": 0.5561876893043518,
"ranking_simple": 0.550000011920929,
"regularize": 0.5561876893043518,
"step": 270,
"wo_beta": 6.527937412261963
},
{
"dpo_loss": 0.5460684299468994,
"epoch": 0.7794048181388757,
"grad_norm": 19.98766091687606,
"learning_rate": 9.239266632888658e-07,
"logits": -1.697192668914795,
"logps": -96.09431457519531,
"loss": 0.5114,
"objective": 0.5460684299468994,
"ranking_simple": 0.574999988079071,
"regularize": 0.5460684299468994,
"step": 275,
"wo_beta": 6.768658638000488
},
{
"dpo_loss": 0.47327375411987305,
"epoch": 0.7935758148323099,
"grad_norm": 21.02838944330306,
"learning_rate": 9.194853109746072e-07,
"logits": -1.8069101572036743,
"logps": -97.56507873535156,
"loss": 0.491,
"objective": 0.47327375411987305,
"ranking_simple": 0.625,
"regularize": 0.47327375411987305,
"step": 280,
"wo_beta": 5.885926723480225
},
{
"dpo_loss": 0.5277642607688904,
"epoch": 0.807746811525744,
"grad_norm": 22.871584835681148,
"learning_rate": 9.14929275564863e-07,
"logits": -1.8146883249282837,
"logps": -99.09355926513672,
"loss": 0.5087,
"objective": 0.5277642607688904,
"ranking_simple": 0.5791666507720947,
"regularize": 0.5277642607688904,
"step": 285,
"wo_beta": 7.316993236541748
},
{
"dpo_loss": 0.43708011507987976,
"epoch": 0.821917808219178,
"grad_norm": 25.529916734053177,
"learning_rate": 9.102598026342222e-07,
"logits": -1.8029001951217651,
"logps": -96.13628387451172,
"loss": 0.4688,
"objective": 0.43708011507987976,
"ranking_simple": 0.625,
"regularize": 0.43708011507987976,
"step": 290,
"wo_beta": 4.752199649810791
},
{
"dpo_loss": 0.49055105447769165,
"epoch": 0.8360888049126122,
"grad_norm": 26.014797277152265,
"learning_rate": 9.0547816876996e-07,
"logits": -1.8255597352981567,
"logps": -96.79292297363281,
"loss": 0.4938,
"objective": 0.49055105447769165,
"ranking_simple": 0.5708333253860474,
"regularize": 0.49055105447769165,
"step": 295,
"wo_beta": 6.931264400482178
},
{
"dpo_loss": 0.4934200048446655,
"epoch": 0.8502598016060463,
"grad_norm": 22.235912832826426,
"learning_rate": 9.005856812230304e-07,
"logits": -1.8234219551086426,
"logps": -98.131103515625,
"loss": 0.4987,
"objective": 0.4934200048446655,
"ranking_simple": 0.6541666388511658,
"regularize": 0.4934200048446655,
"step": 300,
"wo_beta": 4.583770275115967
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.7202683091163635,
"eval_logits": -1.9276690483093262,
"eval_logps": -110.09490966796875,
"eval_loss": 0.711872398853302,
"eval_objective": 0.7202683091163635,
"eval_ranking_simple": 0.5372670888900757,
"eval_regularize": 0.7202683091163635,
"eval_runtime": 369.1999,
"eval_samples_per_second": 15.683,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 8.926669120788574,
"step": 300
},
{
"dpo_loss": 0.5209147334098816,
"epoch": 0.8644307982994804,
"grad_norm": 21.742768863559323,
"learning_rate": 8.955836775506775e-07,
"logits": -1.8012293577194214,
"logps": -102.54885864257812,
"loss": 0.4739,
"objective": 0.5209147334098816,
"ranking_simple": 0.6333333253860474,
"regularize": 0.5209147334098816,
"step": 305,
"wo_beta": 5.649385929107666
},
{
"dpo_loss": 0.4898562729358673,
"epoch": 0.8786017949929145,
"grad_norm": 22.266286975377888,
"learning_rate": 8.904735252507609e-07,
"logits": -1.8041430711746216,
"logps": -101.15433502197266,
"loss": 0.488,
"objective": 0.4898562729358673,
"ranking_simple": 0.6499999761581421,
"regularize": 0.4898562729358673,
"step": 310,
"wo_beta": 6.073896408081055
},
{
"dpo_loss": 0.4698447585105896,
"epoch": 0.8927727916863486,
"grad_norm": 22.62826678390999,
"learning_rate": 8.852566213878946e-07,
"logits": -1.798747181892395,
"logps": -101.55271911621094,
"loss": 0.4839,
"objective": 0.4698447585105896,
"ranking_simple": 0.6083333492279053,
"regularize": 0.4698447585105896,
"step": 315,
"wo_beta": 5.962237358093262
},
{
"dpo_loss": 0.4936090409755707,
"epoch": 0.9069437883797827,
"grad_norm": 25.39576707145419,
"learning_rate": 8.799343922115043e-07,
"logits": -1.7406024932861328,
"logps": -110.01341247558594,
"loss": 0.5003,
"objective": 0.4936090409755707,
"ranking_simple": 0.6666666865348816,
"regularize": 0.4936090409755707,
"step": 320,
"wo_beta": 4.6922502517700195
},
{
"dpo_loss": 0.464478075504303,
"epoch": 0.9211147850732169,
"grad_norm": 22.63000678324253,
"learning_rate": 8.745082927659046e-07,
"logits": -1.8573758602142334,
"logps": -106.48271179199219,
"loss": 0.4601,
"objective": 0.464478075504303,
"ranking_simple": 0.637499988079071,
"regularize": 0.464478075504303,
"step": 325,
"wo_beta": 5.424474239349365
},
{
"dpo_loss": 0.4378047287464142,
"epoch": 0.9352857817666509,
"grad_norm": 26.572088004567764,
"learning_rate": 8.689798064925048e-07,
"logits": -1.6883081197738647,
"logps": -104.9384765625,
"loss": 0.4724,
"objective": 0.4378047287464142,
"ranking_simple": 0.6416666507720947,
"regularize": 0.4378047287464142,
"step": 330,
"wo_beta": 5.635779857635498
},
{
"dpo_loss": 0.4327344000339508,
"epoch": 0.949456778460085,
"grad_norm": 28.535014872524457,
"learning_rate": 8.633504448242504e-07,
"logits": -1.6694140434265137,
"logps": -107.3168716430664,
"loss": 0.4595,
"objective": 0.4327344000339508,
"ranking_simple": 0.6708333492279053,
"regularize": 0.4327344000339508,
"step": 335,
"wo_beta": 5.270318508148193
},
{
"dpo_loss": 0.4696439504623413,
"epoch": 0.9636277751535192,
"grad_norm": 29.414333709201422,
"learning_rate": 8.576217467724127e-07,
"logits": -1.7229362726211548,
"logps": -106.86974334716797,
"loss": 0.5044,
"objective": 0.4696439504623413,
"ranking_simple": 0.637499988079071,
"regularize": 0.4696439504623413,
"step": 340,
"wo_beta": 5.814812660217285
},
{
"dpo_loss": 0.43202081322669983,
"epoch": 0.9777987718469532,
"grad_norm": 21.299611385252437,
"learning_rate": 8.517952785058384e-07,
"logits": -1.7784336805343628,
"logps": -101.36027526855469,
"loss": 0.4585,
"objective": 0.43202081322669983,
"ranking_simple": 0.6958333253860474,
"regularize": 0.43202081322669983,
"step": 345,
"wo_beta": 4.479858875274658
},
{
"dpo_loss": 0.4188024699687958,
"epoch": 0.9919697685403873,
"grad_norm": 20.8027959046757,
"learning_rate": 8.458726329227747e-07,
"logits": -1.6966959238052368,
"logps": -99.99575805664062,
"loss": 0.4599,
"objective": 0.4188024699687958,
"ranking_simple": 0.762499988079071,
"regularize": 0.4188024699687958,
"step": 350,
"wo_beta": 4.140859603881836
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.6912401914596558,
"eval_logits": -1.8473907709121704,
"eval_logps": -104.9832763671875,
"eval_loss": 0.6885838508605957,
"eval_objective": 0.6912401914596558,
"eval_ranking_simple": 0.5351966619491577,
"eval_regularize": 0.6912401914596558,
"eval_runtime": 375.4224,
"eval_samples_per_second": 15.423,
"eval_steps_per_second": 1.287,
"eval_wo_beta": 8.374881744384766,
"step": 350
},
{
"dpo_loss": 0.4273616671562195,
"epoch": 1.0061407652338215,
"grad_norm": 19.801974743070023,
"learning_rate": 8.398554292153865e-07,
"logits": -1.9038132429122925,
"logps": -97.71568298339844,
"loss": 0.4198,
"objective": 0.4273616671562195,
"ranking_simple": 0.6708333492279053,
"regularize": 0.4273616671562195,
"step": 355,
"wo_beta": 4.851585388183594
},
{
"dpo_loss": 0.3563633859157562,
"epoch": 1.0203117619272555,
"grad_norm": 27.347143857547685,
"learning_rate": 8.337453124270862e-07,
"logits": -1.720730185508728,
"logps": -104.91397094726562,
"loss": 0.3668,
"objective": 0.3563633859157562,
"ranking_simple": 0.6958333253860474,
"regularize": 0.3563633859157562,
"step": 360,
"wo_beta": 4.480281352996826
},
{
"dpo_loss": 0.34869903326034546,
"epoch": 1.0344827586206897,
"grad_norm": 21.348824435063978,
"learning_rate": 8.275439530027947e-07,
"logits": -1.730131983757019,
"logps": -111.44975280761719,
"loss": 0.3663,
"objective": 0.34869903326034546,
"ranking_simple": 0.7041666507720947,
"regularize": 0.34869903326034546,
"step": 365,
"wo_beta": 3.829502820968628
},
{
"dpo_loss": 0.3821018934249878,
"epoch": 1.0486537553141237,
"grad_norm": 23.44162059864065,
"learning_rate": 8.212530463322582e-07,
"logits": -1.7056537866592407,
"logps": -104.91122436523438,
"loss": 0.3708,
"objective": 0.3821018934249878,
"ranking_simple": 0.6875,
"regularize": 0.3821018934249878,
"step": 370,
"wo_beta": 5.17144250869751
},
{
"dpo_loss": 0.3470642864704132,
"epoch": 1.0628247520075578,
"grad_norm": 21.682188897692722,
"learning_rate": 8.148743122865463e-07,
"logits": -1.7548179626464844,
"logps": -104.17964172363281,
"loss": 0.3665,
"objective": 0.3470642864704132,
"ranking_simple": 0.6875,
"regularize": 0.3470642864704132,
"step": 375,
"wo_beta": 3.6793243885040283
},
{
"dpo_loss": 0.35725000500679016,
"epoch": 1.076995748700992,
"grad_norm": 22.228388769880265,
"learning_rate": 8.084094947478554e-07,
"logits": -1.7486780881881714,
"logps": -107.26901245117188,
"loss": 0.3593,
"objective": 0.35725000500679016,
"ranking_simple": 0.7083333134651184,
"regularize": 0.35725000500679016,
"step": 380,
"wo_beta": 3.6982295513153076
},
{
"dpo_loss": 0.36463692784309387,
"epoch": 1.091166745394426,
"grad_norm": 21.586757686789323,
"learning_rate": 8.018603611327504e-07,
"logits": -1.6958861351013184,
"logps": -106.55477905273438,
"loss": 0.3642,
"objective": 0.36463692784309387,
"ranking_simple": 0.7041666507720947,
"regularize": 0.36463692784309387,
"step": 385,
"wo_beta": 4.191780090332031
},
{
"dpo_loss": 0.32307326793670654,
"epoch": 1.10533774208786,
"grad_norm": 21.44019149145441,
"learning_rate": 7.952287019089685e-07,
"logits": -1.6498711109161377,
"logps": -109.88943481445312,
"loss": 0.3408,
"objective": 0.32307326793670654,
"ranking_simple": 0.737500011920929,
"regularize": 0.32307326793670654,
"step": 390,
"wo_beta": 4.2303996086120605
},
{
"dpo_loss": 0.3521862328052521,
"epoch": 1.1195087387812943,
"grad_norm": 25.820266993638842,
"learning_rate": 7.88516330105925e-07,
"logits": -1.726596713066101,
"logps": -108.69520568847656,
"loss": 0.347,
"objective": 0.3521862328052521,
"ranking_simple": 0.7291666865348816,
"regularize": 0.3521862328052521,
"step": 395,
"wo_beta": 3.9551658630371094
},
{
"dpo_loss": 0.343461811542511,
"epoch": 1.1336797354747283,
"grad_norm": 22.721697115040456,
"learning_rate": 7.817250808190483e-07,
"logits": -1.8199702501296997,
"logps": -106.22103118896484,
"loss": 0.3498,
"objective": 0.343461811542511,
"ranking_simple": 0.7208333611488342,
"regularize": 0.343461811542511,
"step": 400,
"wo_beta": 3.51658034324646
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.751783549785614,
"eval_logits": -1.8806556463241577,
"eval_logps": -115.0888671875,
"eval_loss": 0.7462870478630066,
"eval_objective": 0.751783549785614,
"eval_ranking_simple": 0.5517598390579224,
"eval_regularize": 0.751783549785614,
"eval_runtime": 370.3425,
"eval_samples_per_second": 15.634,
"eval_steps_per_second": 1.304,
"eval_wo_beta": 9.550463676452637,
"step": 400
},
{
"dpo_loss": 0.3792721927165985,
"epoch": 1.1478507321681626,
"grad_norm": 32.9676442992408,
"learning_rate": 7.74856810708083e-07,
"logits": -1.7736860513687134,
"logps": -107.8192367553711,
"loss": 0.3694,
"objective": 0.3792721927165985,
"ranking_simple": 0.7208333611488342,
"regularize": 0.3792721927165985,
"step": 405,
"wo_beta": 5.114300727844238
},
{
"dpo_loss": 0.3683342933654785,
"epoch": 1.1620217288615966,
"grad_norm": 21.343924046898124,
"learning_rate": 7.679133974894982e-07,
"logits": -1.7912460565567017,
"logps": -105.35082244873047,
"loss": 0.3582,
"objective": 0.3683342933654785,
"ranking_simple": 0.7166666388511658,
"regularize": 0.3683342933654785,
"step": 410,
"wo_beta": 5.410634517669678
},
{
"dpo_loss": 0.3712156116962433,
"epoch": 1.1761927255550306,
"grad_norm": 20.631633414155974,
"learning_rate": 7.608967394231386e-07,
"logits": -1.7320811748504639,
"logps": -101.5705337524414,
"loss": 0.3623,
"objective": 0.3712156116962433,
"ranking_simple": 0.6958333253860474,
"regularize": 0.3712156116962433,
"step": 415,
"wo_beta": 5.24213171005249
},
{
"dpo_loss": 0.34116995334625244,
"epoch": 1.1903637222484649,
"grad_norm": 21.634750653170887,
"learning_rate": 7.538087547932584e-07,
"logits": -1.67872953414917,
"logps": -101.14533233642578,
"loss": 0.359,
"objective": 0.34116995334625244,
"ranking_simple": 0.6791666746139526,
"regularize": 0.34116995334625244,
"step": 420,
"wo_beta": 4.399470329284668
},
{
"dpo_loss": 0.3757858872413635,
"epoch": 1.204534718941899,
"grad_norm": 21.74672077991223,
"learning_rate": 7.466513813840824e-07,
"logits": -1.733936071395874,
"logps": -99.8553466796875,
"loss": 0.3472,
"objective": 0.3757858872413635,
"ranking_simple": 0.7083333134651184,
"regularize": 0.3757858872413635,
"step": 425,
"wo_beta": 4.1172003746032715
},
{
"dpo_loss": 0.3544313311576843,
"epoch": 1.2187057156353331,
"grad_norm": 25.498669816549643,
"learning_rate": 7.394265759500347e-07,
"logits": -1.7176556587219238,
"logps": -105.41299438476562,
"loss": 0.3569,
"objective": 0.3544313311576843,
"ranking_simple": 0.6958333253860474,
"regularize": 0.3544313311576843,
"step": 430,
"wo_beta": 4.10382604598999
},
{
"dpo_loss": 0.35303547978401184,
"epoch": 1.2328767123287672,
"grad_norm": 24.236679665592966,
"learning_rate": 7.321363136807818e-07,
"logits": -1.6603659391403198,
"logps": -107.16277313232422,
"loss": 0.3778,
"objective": 0.35303547978401184,
"ranking_simple": 0.75,
"regularize": 0.35303547978401184,
"step": 435,
"wo_beta": 3.8118536472320557
},
{
"dpo_loss": 0.32170751690864563,
"epoch": 1.2470477090222012,
"grad_norm": 25.528958844465567,
"learning_rate": 7.247825876612352e-07,
"logits": -1.6398621797561646,
"logps": -105.9729995727539,
"loss": 0.3429,
"objective": 0.32170751690864563,
"ranking_simple": 0.762499988079071,
"regularize": 0.32170751690864563,
"step": 440,
"wo_beta": 3.466229200363159
},
{
"dpo_loss": 0.35508811473846436,
"epoch": 1.2612187057156352,
"grad_norm": 20.894838905133128,
"learning_rate": 7.173674083266623e-07,
"logits": -1.6645927429199219,
"logps": -106.34298706054688,
"loss": 0.3613,
"objective": 0.35508811473846436,
"ranking_simple": 0.737500011920929,
"regularize": 0.35508811473846436,
"step": 445,
"wo_beta": 4.097968101501465
},
{
"dpo_loss": 0.32759609818458557,
"epoch": 1.2753897024090695,
"grad_norm": 21.051410520730908,
"learning_rate": 7.098928029130528e-07,
"logits": -1.8029848337173462,
"logps": -107.71712493896484,
"loss": 0.3361,
"objective": 0.32759609818458557,
"ranking_simple": 0.7583333253860474,
"regularize": 0.32759609818458557,
"step": 450,
"wo_beta": 3.633894443511963
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.7672637701034546,
"eval_logits": -1.8356177806854248,
"eval_logps": -116.80036926269531,
"eval_loss": 0.7562825679779053,
"eval_objective": 0.7672637701034546,
"eval_ranking_simple": 0.5419254899024963,
"eval_regularize": 0.7672637701034546,
"eval_runtime": 367.9761,
"eval_samples_per_second": 15.735,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 9.725224494934082,
"step": 450
},
{
"dpo_loss": 0.3631579279899597,
"epoch": 1.2895606991025035,
"grad_norm": 27.512782619434457,
"learning_rate": 7.023608149028936e-07,
"logits": -1.6034198999404907,
"logps": -110.81619262695312,
"loss": 0.3689,
"objective": 0.3631579279899597,
"ranking_simple": 0.6708333492279053,
"regularize": 0.3631579279899597,
"step": 455,
"wo_beta": 4.727202892303467
},
{
"dpo_loss": 0.3907562792301178,
"epoch": 1.3037316957959377,
"grad_norm": 24.168580409550643,
"learning_rate": 6.947735034665001e-07,
"logits": -1.7300904989242554,
"logps": -108.0628890991211,
"loss": 0.3712,
"objective": 0.3907562792301178,
"ranking_simple": 0.6583333611488342,
"regularize": 0.3907562792301178,
"step": 460,
"wo_beta": 5.317975997924805
},
{
"dpo_loss": 0.3442947268486023,
"epoch": 1.3179026924893718,
"grad_norm": 22.349252886711223,
"learning_rate": 6.871329428990601e-07,
"logits": -1.7600762844085693,
"logps": -102.89717102050781,
"loss": 0.366,
"objective": 0.3442947268486023,
"ranking_simple": 0.7166666388511658,
"regularize": 0.3442947268486023,
"step": 465,
"wo_beta": 4.05583381652832
},
{
"dpo_loss": 0.36818769574165344,
"epoch": 1.3320736891828058,
"grad_norm": 21.171077905427786,
"learning_rate": 6.794412220535425e-07,
"logits": -1.8482578992843628,
"logps": -100.47290802001953,
"loss": 0.3612,
"objective": 0.36818769574165344,
"ranking_simple": 0.7250000238418579,
"regularize": 0.36818769574165344,
"step": 470,
"wo_beta": 3.844135046005249
},
{
"dpo_loss": 0.3290488123893738,
"epoch": 1.34624468587624,
"grad_norm": 23.17750388094903,
"learning_rate": 6.717004437696249e-07,
"logits": -1.6789878606796265,
"logps": -102.9361572265625,
"loss": 0.3486,
"objective": 0.3290488123893738,
"ranking_simple": 0.7541666626930237,
"regularize": 0.3290488123893738,
"step": 475,
"wo_beta": 3.849888324737549
},
{
"dpo_loss": 0.3331288993358612,
"epoch": 1.360415682569674,
"grad_norm": 31.57784191234236,
"learning_rate": 6.639127242987987e-07,
"logits": -1.7997510433197021,
"logps": -105.81382751464844,
"loss": 0.3418,
"objective": 0.3331288993358612,
"ranking_simple": 0.7124999761581421,
"regularize": 0.3331288993358612,
"step": 480,
"wo_beta": 5.502153396606445
},
{
"dpo_loss": 0.3518763482570648,
"epoch": 1.3745866792631083,
"grad_norm": 25.72021920875409,
"learning_rate": 6.560801927258079e-07,
"logits": -1.763397455215454,
"logps": -105.35774993896484,
"loss": 0.3606,
"objective": 0.3518763482570648,
"ranking_simple": 0.7416666746139526,
"regularize": 0.3518763482570648,
"step": 485,
"wo_beta": 4.792428016662598
},
{
"dpo_loss": 0.3617197871208191,
"epoch": 1.3887576759565423,
"grad_norm": 22.21535306569678,
"learning_rate": 6.482049903865768e-07,
"logits": -1.7518786191940308,
"logps": -109.02607727050781,
"loss": 0.3414,
"objective": 0.3617197871208191,
"ranking_simple": 0.7124999761581421,
"regularize": 0.3617197871208191,
"step": 490,
"wo_beta": 3.807429790496826
},
{
"dpo_loss": 0.33107537031173706,
"epoch": 1.4029286726499763,
"grad_norm": 28.27182569867929,
"learning_rate": 6.402892702827916e-07,
"logits": -1.7524651288986206,
"logps": -110.29097747802734,
"loss": 0.3453,
"objective": 0.33107537031173706,
"ranking_simple": 0.7083333134651184,
"regularize": 0.33107537031173706,
"step": 495,
"wo_beta": 4.099748611450195
},
{
"dpo_loss": 0.3612217307090759,
"epoch": 1.4170996693434104,
"grad_norm": 26.907643867295747,
"learning_rate": 6.323351964932908e-07,
"logits": -1.6837791204452515,
"logps": -109.02066802978516,
"loss": 0.3584,
"objective": 0.3612217307090759,
"ranking_simple": 0.7041666507720947,
"regularize": 0.3612217307090759,
"step": 500,
"wo_beta": 3.91571307182312
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7694771885871887,
"eval_logits": -1.8626275062561035,
"eval_logps": -117.51673126220703,
"eval_loss": 0.7635300159454346,
"eval_objective": 0.7694771885871887,
"eval_ranking_simple": 0.5419254899024963,
"eval_regularize": 0.7694771885871887,
"eval_runtime": 370.4437,
"eval_samples_per_second": 15.63,
"eval_steps_per_second": 1.304,
"eval_wo_beta": 9.631916999816895,
"step": 500
},
{
"dpo_loss": 0.2975477874279022,
"epoch": 1.4312706660368446,
"grad_norm": 22.51215190081199,
"learning_rate": 6.243449435824276e-07,
"logits": -1.7515524625778198,
"logps": -109.30506134033203,
"loss": 0.3308,
"objective": 0.2975477874279022,
"ranking_simple": 0.75,
"regularize": 0.2975477874279022,
"step": 505,
"wo_beta": 3.399846076965332
},
{
"dpo_loss": 0.3239024877548218,
"epoch": 1.4454416627302786,
"grad_norm": 25.756520666755268,
"learning_rate": 6.163206960055652e-07,
"logits": -1.7505611181259155,
"logps": -108.20401000976562,
"loss": 0.3502,
"objective": 0.3239024877548218,
"ranking_simple": 0.75,
"regularize": 0.3239024877548218,
"step": 510,
"wo_beta": 3.145045042037964
},
{
"dpo_loss": 0.3271316587924957,
"epoch": 1.4596126594237129,
"grad_norm": 22.00239796246049,
"learning_rate": 6.082646475118699e-07,
"logits": -1.8232632875442505,
"logps": -107.82081604003906,
"loss": 0.3356,
"objective": 0.3271316587924957,
"ranking_simple": 0.6708333492279053,
"regularize": 0.3271316587924957,
"step": 515,
"wo_beta": 4.939964771270752
},
{
"dpo_loss": 0.30039647221565247,
"epoch": 1.473783656117147,
"grad_norm": 22.8094809920846,
"learning_rate": 6.001790005445606e-07,
"logits": -1.6817113161087036,
"logps": -106.67537689208984,
"loss": 0.3092,
"objective": 0.30039647221565247,
"ranking_simple": 0.7541666626930237,
"regularize": 0.30039647221565247,
"step": 520,
"wo_beta": 3.6650784015655518
},
{
"dpo_loss": 0.34032100439071655,
"epoch": 1.487954652810581,
"grad_norm": 25.538184332619608,
"learning_rate": 5.920659656387836e-07,
"logits": -1.5860577821731567,
"logps": -107.61659240722656,
"loss": 0.3466,
"objective": 0.34032100439071655,
"ranking_simple": 0.75,
"regularize": 0.34032100439071655,
"step": 525,
"wo_beta": 4.031210422515869
},
{
"dpo_loss": 0.3295021951198578,
"epoch": 1.5021256495040152,
"grad_norm": 22.986849455924027,
"learning_rate": 5.839277608172738e-07,
"logits": -1.743402123451233,
"logps": -111.25679016113281,
"loss": 0.3262,
"objective": 0.3295021951198578,
"ranking_simple": 0.737500011920929,
"regularize": 0.3295021951198578,
"step": 530,
"wo_beta": 3.9925944805145264
},
{
"dpo_loss": 0.29601436853408813,
"epoch": 1.5162966461974492,
"grad_norm": 25.044023904201577,
"learning_rate": 5.757666109839702e-07,
"logits": -1.7071605920791626,
"logps": -112.61083984375,
"loss": 0.3088,
"objective": 0.29601436853408813,
"ranking_simple": 0.7541666626930237,
"regularize": 0.29601436853408813,
"step": 535,
"wo_beta": 3.535855531692505
},
{
"dpo_loss": 0.350533127784729,
"epoch": 1.5304676428908834,
"grad_norm": 22.77917543570306,
"learning_rate": 5.675847473157485e-07,
"logits": -1.6420516967773438,
"logps": -114.60887145996094,
"loss": 0.338,
"objective": 0.350533127784729,
"ranking_simple": 0.7708333134651184,
"regularize": 0.350533127784729,
"step": 540,
"wo_beta": 3.302687644958496
},
{
"dpo_loss": 0.31907784938812256,
"epoch": 1.5446386395843175,
"grad_norm": 27.52281539562519,
"learning_rate": 5.5938440665244e-07,
"logits": -1.7368324995040894,
"logps": -117.18190002441406,
"loss": 0.3311,
"objective": 0.31907784938812256,
"ranking_simple": 0.7875000238418579,
"regularize": 0.31907784938812256,
"step": 545,
"wo_beta": 2.7296268939971924
},
{
"dpo_loss": 0.3072589933872223,
"epoch": 1.5588096362777515,
"grad_norm": 24.90909396316388,
"learning_rate": 5.511678308853025e-07,
"logits": -1.7334787845611572,
"logps": -118.51893615722656,
"loss": 0.3343,
"objective": 0.3072589933872223,
"ranking_simple": 0.7208333611488342,
"regularize": 0.3072589933872223,
"step": 550,
"wo_beta": 3.902778148651123
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.7814171314239502,
"eval_logits": -1.8209288120269775,
"eval_logps": -123.38627624511719,
"eval_loss": 0.7698224186897278,
"eval_objective": 0.7814171314239502,
"eval_ranking_simple": 0.5351966619491577,
"eval_regularize": 0.7814171314239502,
"eval_runtime": 367.7587,
"eval_samples_per_second": 15.744,
"eval_steps_per_second": 1.313,
"eval_wo_beta": 9.825753211975098,
"step": 550
},
{
"dpo_loss": 0.34100914001464844,
"epoch": 1.5729806329711855,
"grad_norm": 24.299530028848974,
"learning_rate": 5.429372663441085e-07,
"logits": -1.5191646814346313,
"logps": -116.5060806274414,
"loss": 0.339,
"objective": 0.34100914001464844,
"ranking_simple": 0.737500011920929,
"regularize": 0.34100914001464844,
"step": 555,
"wo_beta": 3.8065884113311768
},
{
"dpo_loss": 0.3122609257698059,
"epoch": 1.5871516296646198,
"grad_norm": 26.61780255104318,
"learning_rate": 5.34694963183022e-07,
"logits": -1.5900938510894775,
"logps": -112.42914581298828,
"loss": 0.334,
"objective": 0.3122609257698059,
"ranking_simple": 0.7666666507720947,
"regularize": 0.3122609257698059,
"step": 560,
"wo_beta": 4.191233158111572
},
{
"dpo_loss": 0.3086826205253601,
"epoch": 1.601322626358054,
"grad_norm": 23.97171206395212,
"learning_rate": 5.264431747654283e-07,
"logits": -1.5865463018417358,
"logps": -112.55115509033203,
"loss": 0.334,
"objective": 0.3086826205253601,
"ranking_simple": 0.7333333492279053,
"regularize": 0.3086826205253601,
"step": 565,
"wo_beta": 3.760244846343994
},
{
"dpo_loss": 0.3280898928642273,
"epoch": 1.615493623051488,
"grad_norm": 26.790227869518926,
"learning_rate": 5.181841570478872e-07,
"logits": -1.7293529510498047,
"logps": -114.50617980957031,
"loss": 0.3065,
"objective": 0.3280898928642273,
"ranking_simple": 0.7166666388511658,
"regularize": 0.3280898928642273,
"step": 570,
"wo_beta": 4.093240737915039
},
{
"dpo_loss": 0.356030136346817,
"epoch": 1.629664619744922,
"grad_norm": 29.38280925874381,
"learning_rate": 5.099201679633768e-07,
"logits": -1.7021836042404175,
"logps": -112.18016052246094,
"loss": 0.3442,
"objective": 0.356030136346817,
"ranking_simple": 0.7458333373069763,
"regularize": 0.356030136346817,
"step": 575,
"wo_beta": 3.8104941844940186
},
{
"dpo_loss": 0.2992390990257263,
"epoch": 1.643835616438356,
"grad_norm": 24.710654774900764,
"learning_rate": 5.016534668039976e-07,
"logits": -1.73283052444458,
"logps": -111.73848724365234,
"loss": 0.3113,
"objective": 0.2992390990257263,
"ranking_simple": 0.762499988079071,
"regularize": 0.2992390990257263,
"step": 580,
"wo_beta": 2.8948888778686523
},
{
"dpo_loss": 0.30101045966148376,
"epoch": 1.6580066131317903,
"grad_norm": 25.736134751129057,
"learning_rate": 4.933863136033039e-07,
"logits": -1.5684159994125366,
"logps": -111.7412109375,
"loss": 0.3245,
"objective": 0.30101045966148376,
"ranking_simple": 0.7124999761581421,
"regularize": 0.30101045966148376,
"step": 585,
"wo_beta": 3.5469541549682617
},
{
"dpo_loss": 0.29352518916130066,
"epoch": 1.6721776098252243,
"grad_norm": 28.292224202430326,
"learning_rate": 4.851209685184338e-07,
"logits": -1.6094284057617188,
"logps": -114.35250854492188,
"loss": 0.3264,
"objective": 0.29352518916130066,
"ranking_simple": 0.7458333373069763,
"regularize": 0.29352518916130066,
"step": 590,
"wo_beta": 3.287980794906616
},
{
"dpo_loss": 0.32477742433547974,
"epoch": 1.6863486065186586,
"grad_norm": 26.952229320357798,
"learning_rate": 4.768596912122045e-07,
"logits": -1.5896912813186646,
"logps": -114.52155303955078,
"loss": 0.3252,
"objective": 0.32477742433547974,
"ranking_simple": 0.7416666746139526,
"regularize": 0.32477742433547974,
"step": 595,
"wo_beta": 4.611125469207764
},
{
"dpo_loss": 0.2708142399787903,
"epoch": 1.7005196032120926,
"grad_norm": 26.06064163090054,
"learning_rate": 4.686047402353433e-07,
"logits": -1.6145151853561401,
"logps": -114.16989135742188,
"loss": 0.3105,
"objective": 0.2708142399787903,
"ranking_simple": 0.7708333134651184,
"regularize": 0.2708142399787903,
"step": 600,
"wo_beta": 3.207538604736328
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.7761210799217224,
"eval_logits": -1.7865931987762451,
"eval_logps": -119.82308959960938,
"eval_loss": 0.7679409980773926,
"eval_objective": 0.7761210799217224,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.7761210799217224,
"eval_runtime": 372.6791,
"eval_samples_per_second": 15.536,
"eval_steps_per_second": 1.296,
"eval_wo_beta": 9.803099632263184,
"step": 600
},
{
"dpo_loss": 0.32439085841178894,
"epoch": 1.7146905999055266,
"grad_norm": 25.8504927528126,
"learning_rate": 4.60358372409022e-07,
"logits": -1.602834701538086,
"logps": -112.0418472290039,
"loss": 0.3292,
"objective": 0.32439085841178894,
"ranking_simple": 0.7250000238418579,
"regularize": 0.32439085841178894,
"step": 605,
"wo_beta": 4.832021713256836
},
{
"dpo_loss": 0.36444205045700073,
"epoch": 1.7288615965989607,
"grad_norm": 26.17653629180954,
"learning_rate": 4.521228422078649e-07,
"logits": -1.6547633409500122,
"logps": -110.03471374511719,
"loss": 0.3239,
"objective": 0.36444205045700073,
"ranking_simple": 0.75,
"regularize": 0.36444205045700073,
"step": 610,
"wo_beta": 4.15641975402832
},
{
"dpo_loss": 0.3479357659816742,
"epoch": 1.743032593292395,
"grad_norm": 25.45060113638119,
"learning_rate": 4.439004011435979e-07,
"logits": -1.6919087171554565,
"logps": -109.51602172851562,
"loss": 0.3279,
"objective": 0.3479357659816742,
"ranking_simple": 0.7250000238418579,
"regularize": 0.3479357659816742,
"step": 615,
"wo_beta": 4.716867446899414
},
{
"dpo_loss": 0.33088982105255127,
"epoch": 1.7572035899858292,
"grad_norm": 26.887147654271335,
"learning_rate": 4.3569329714950703e-07,
"logits": -1.7291035652160645,
"logps": -109.82331085205078,
"loss": 0.3425,
"objective": 0.33088982105255127,
"ranking_simple": 0.7583333253860474,
"regularize": 0.33088982105255127,
"step": 620,
"wo_beta": 3.5512850284576416
},
{
"dpo_loss": 0.3090989291667938,
"epoch": 1.7713745866792632,
"grad_norm": 26.389383350627952,
"learning_rate": 4.275037739658771e-07,
"logits": -1.590990424156189,
"logps": -110.23678588867188,
"loss": 0.3234,
"objective": 0.3090989291667938,
"ranking_simple": 0.7250000238418579,
"regularize": 0.3090989291667938,
"step": 625,
"wo_beta": 3.666703462600708
},
{
"dpo_loss": 0.33155128359794617,
"epoch": 1.7855455833726972,
"grad_norm": 29.336966725897334,
"learning_rate": 4.193340705265745e-07,
"logits": -1.635860562324524,
"logps": -114.24417114257812,
"loss": 0.3204,
"objective": 0.33155128359794617,
"ranking_simple": 0.7083333134651184,
"regularize": 0.33155128359794617,
"step": 630,
"wo_beta": 4.568333625793457
},
{
"dpo_loss": 0.3347407877445221,
"epoch": 1.7997165800661312,
"grad_norm": 31.313014707447742,
"learning_rate": 4.1118642034694565e-07,
"logits": -1.7257325649261475,
"logps": -116.03430938720703,
"loss": 0.3376,
"objective": 0.3347407877445221,
"ranking_simple": 0.7250000238418579,
"regularize": 0.3347407877445221,
"step": 635,
"wo_beta": 4.352345943450928
},
{
"dpo_loss": 0.30713585019111633,
"epoch": 1.8138875767595655,
"grad_norm": 25.70905071821337,
"learning_rate": 4.030630509131959e-07,
"logits": -1.6731877326965332,
"logps": -116.4244155883789,
"loss": 0.3098,
"objective": 0.30713585019111633,
"ranking_simple": 0.7583333253860474,
"regularize": 0.30713585019111633,
"step": 640,
"wo_beta": 4.2585673332214355
},
{
"dpo_loss": 0.29875513911247253,
"epoch": 1.8280585734529995,
"grad_norm": 23.791400324977506,
"learning_rate": 3.9496618307341713e-07,
"logits": -1.7462607622146606,
"logps": -116.83419036865234,
"loss": 0.3005,
"objective": 0.29875513911247253,
"ranking_simple": 0.7791666388511658,
"regularize": 0.29875513911247253,
"step": 645,
"wo_beta": 3.3428430557250977
},
{
"dpo_loss": 0.32320088148117065,
"epoch": 1.8422295701464337,
"grad_norm": 32.03839388955998,
"learning_rate": 3.8689803043042996e-07,
"logits": -1.8062669038772583,
"logps": -117.27250671386719,
"loss": 0.3412,
"objective": 0.32320088148117065,
"ranking_simple": 0.7583333253860474,
"regularize": 0.32320088148117065,
"step": 650,
"wo_beta": 3.019827127456665
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.7847943305969238,
"eval_logits": -1.832274079322815,
"eval_logps": -122.29435729980469,
"eval_loss": 0.7749964594841003,
"eval_objective": 0.7847943305969238,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.7847943305969238,
"eval_runtime": 369.3337,
"eval_samples_per_second": 15.677,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 9.94935417175293,
"step": 650
},
{
"dpo_loss": 0.3198917508125305,
"epoch": 1.8564005668398678,
"grad_norm": 24.453465336538663,
"learning_rate": 3.788607987366069e-07,
"logits": -1.6524808406829834,
"logps": -113.18647766113281,
"loss": 0.333,
"objective": 0.3198917508125305,
"ranking_simple": 0.737500011920929,
"regularize": 0.3198917508125305,
"step": 655,
"wo_beta": 3.779681444168091
},
{
"dpo_loss": 0.30827051401138306,
"epoch": 1.8705715635333018,
"grad_norm": 25.30249944675523,
"learning_rate": 3.708566852908418e-07,
"logits": -1.722990870475769,
"logps": -117.13253021240234,
"loss": 0.3093,
"objective": 0.30827051401138306,
"ranking_simple": 0.7250000238418579,
"regularize": 0.30827051401138306,
"step": 660,
"wo_beta": 3.8630990982055664
},
{
"dpo_loss": 0.30362004041671753,
"epoch": 1.8847425602267358,
"grad_norm": 26.577906210584395,
"learning_rate": 3.6288787833783016e-07,
"logits": -1.6925681829452515,
"logps": -114.92183685302734,
"loss": 0.3287,
"objective": 0.30362004041671753,
"ranking_simple": 0.7749999761581421,
"regularize": 0.30362004041671753,
"step": 665,
"wo_beta": 3.2407257556915283
},
{
"dpo_loss": 0.33854812383651733,
"epoch": 1.89891355692017,
"grad_norm": 26.608491612605135,
"learning_rate": 3.5495655646982503e-07,
"logits": -1.632662057876587,
"logps": -113.74718475341797,
"loss": 0.3206,
"objective": 0.33854812383651733,
"ranking_simple": 0.7124999761581421,
"regularize": 0.33854812383651733,
"step": 670,
"wo_beta": 4.751885890960693
},
{
"dpo_loss": 0.30450791120529175,
"epoch": 1.9130845536136043,
"grad_norm": 27.707963800825,
"learning_rate": 3.470648880310313e-07,
"logits": -1.7001540660858154,
"logps": -112.93498992919922,
"loss": 0.3286,
"objective": 0.30450791120529175,
"ranking_simple": 0.800000011920929,
"regularize": 0.30450791120529175,
"step": 675,
"wo_beta": 2.8013789653778076
},
{
"dpo_loss": 0.29233846068382263,
"epoch": 1.9272555503070383,
"grad_norm": 26.318074274927174,
"learning_rate": 3.3921503052480236e-07,
"logits": -1.7435904741287231,
"logps": -115.07746887207031,
"loss": 0.3108,
"objective": 0.29233846068382263,
"ranking_simple": 0.7833333611488342,
"regularize": 0.29233846068382263,
"step": 680,
"wo_beta": 3.6125741004943848
},
{
"dpo_loss": 0.2992376685142517,
"epoch": 1.9414265470004723,
"grad_norm": 26.980724381093548,
"learning_rate": 3.314091300237999e-07,
"logits": -1.6790062189102173,
"logps": -115.0765380859375,
"loss": 0.2818,
"objective": 0.2992376685142517,
"ranking_simple": 0.7333333492279053,
"regularize": 0.2992376685142517,
"step": 685,
"wo_beta": 4.020833969116211
},
{
"dpo_loss": 0.2989169955253601,
"epoch": 1.9555975436939064,
"grad_norm": 28.956498459281256,
"learning_rate": 3.236493205832794e-07,
"logits": -1.7138378620147705,
"logps": -116.5325927734375,
"loss": 0.3112,
"objective": 0.2989169955253601,
"ranking_simple": 0.762499988079071,
"regularize": 0.2989169955253601,
"step": 690,
"wo_beta": 3.235595941543579
},
{
"dpo_loss": 0.3340277373790741,
"epoch": 1.9697685403873406,
"grad_norm": 29.008978725819244,
"learning_rate": 3.15937723657661e-07,
"logits": -1.5779744386672974,
"logps": -117.33056640625,
"loss": 0.3178,
"objective": 0.3340277373790741,
"ranking_simple": 0.7416666746139526,
"regularize": 0.3340277373790741,
"step": 695,
"wo_beta": 4.477265357971191
},
{
"dpo_loss": 0.3072899878025055,
"epoch": 1.9839395370807746,
"grad_norm": 26.91852361479357,
"learning_rate": 3.082764475205442e-07,
"logits": -1.5716139078140259,
"logps": -120.74311065673828,
"loss": 0.3156,
"objective": 0.3072899878025055,
"ranking_simple": 0.7583333253860474,
"regularize": 0.3072899878025055,
"step": 700,
"wo_beta": 3.845552682876587
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.813927948474884,
"eval_logits": -1.8338414430618286,
"eval_logps": -126.39391326904297,
"eval_loss": 0.8013313412666321,
"eval_objective": 0.813927948474884,
"eval_ranking_simple": 0.5377846956253052,
"eval_regularize": 0.813927948474884,
"eval_runtime": 375.3584,
"eval_samples_per_second": 15.425,
"eval_steps_per_second": 1.287,
"eval_wo_beta": 10.32465934753418,
"step": 700
},
{
"dpo_loss": 0.3367713391780853,
"epoch": 1.9981105337742089,
"grad_norm": 30.634677985646626,
"learning_rate": 3.006675866883275e-07,
"logits": -1.486984133720398,
"logps": -120.58149719238281,
"loss": 0.3202,
"objective": 0.3367713391780853,
"ranking_simple": 0.699999988079071,
"regularize": 0.3367713391780853,
"step": 705,
"wo_beta": 5.604025840759277
},
{
"dpo_loss": 0.26506486535072327,
"epoch": 2.012281530467643,
"grad_norm": 21.811542358240928,
"learning_rate": 2.931132213475884e-07,
"logits": -1.652250051498413,
"logps": -117.85489654541016,
"loss": 0.247,
"objective": 0.26506486535072327,
"ranking_simple": 0.7958333492279053,
"regularize": 0.26506486535072327,
"step": 710,
"wo_beta": 2.941725015640259
},
{
"dpo_loss": 0.21083328127861023,
"epoch": 2.026452527161077,
"grad_norm": 22.009413556660768,
"learning_rate": 2.856154167863814e-07,
"logits": -1.7095146179199219,
"logps": -118.10084533691406,
"loss": 0.2213,
"objective": 0.21083328127861023,
"ranking_simple": 0.8166666626930237,
"regularize": 0.21083328127861023,
"step": 715,
"wo_beta": 1.996678113937378
},
{
"dpo_loss": 0.1858675181865692,
"epoch": 2.040623523854511,
"grad_norm": 19.31295851177097,
"learning_rate": 2.7817622282960813e-07,
"logits": -1.6148954629898071,
"logps": -123.07051086425781,
"loss": 0.2017,
"objective": 0.1858675181865692,
"ranking_simple": 0.824999988079071,
"regularize": 0.1858675181865692,
"step": 720,
"wo_beta": 2.2542519569396973
},
{
"dpo_loss": 0.2097151279449463,
"epoch": 2.0547945205479454,
"grad_norm": 24.65512776088815,
"learning_rate": 2.707976732786166e-07,
"logits": -1.686496376991272,
"logps": -126.76118469238281,
"loss": 0.2047,
"objective": 0.2097151279449463,
"ranking_simple": 0.8083333373069763,
"regularize": 0.2097151279449463,
"step": 725,
"wo_beta": 2.5733823776245117
},
{
"dpo_loss": 0.23622439801692963,
"epoch": 2.0689655172413794,
"grad_norm": 27.027861051451126,
"learning_rate": 2.6348178535517965e-07,
"logits": -1.7227706909179688,
"logps": -126.69216918945312,
"loss": 0.2175,
"objective": 0.23622439801692963,
"ranking_simple": 0.7749999761581421,
"regularize": 0.23622439801692963,
"step": 730,
"wo_beta": 3.341784715652466
},
{
"dpo_loss": 0.23194490373134613,
"epoch": 2.0831365139348135,
"grad_norm": 29.247659239837198,
"learning_rate": 2.5623055915000686e-07,
"logits": -1.6128872632980347,
"logps": -126.96920776367188,
"loss": 0.2248,
"objective": 0.23194490373134613,
"ranking_simple": 0.7875000238418579,
"regularize": 0.23194490373134613,
"step": 735,
"wo_beta": 3.5192384719848633
},
{
"dpo_loss": 0.2231958657503128,
"epoch": 2.0973075106282475,
"grad_norm": 25.19144911793584,
"learning_rate": 2.490459770759398e-07,
"logits": -1.6792032718658447,
"logps": -123.05906677246094,
"loss": 0.2328,
"objective": 0.2231958657503128,
"ranking_simple": 0.8125,
"regularize": 0.2231958657503128,
"step": 740,
"wo_beta": 2.758615016937256
},
{
"dpo_loss": 0.20981092751026154,
"epoch": 2.1114785073216815,
"grad_norm": 23.2038862143533,
"learning_rate": 2.419300033259798e-07,
"logits": -1.6360180377960205,
"logps": -124.81270599365234,
"loss": 0.2182,
"objective": 0.20981092751026154,
"ranking_simple": 0.7791666388511658,
"regularize": 0.20981092751026154,
"step": 745,
"wo_beta": 3.1798312664031982
},
{
"dpo_loss": 0.22788210213184357,
"epoch": 2.1256495040151155,
"grad_norm": 22.72550221909295,
"learning_rate": 2.3488458333629773e-07,
"logits": -1.6554747819900513,
"logps": -122.24414825439453,
"loss": 0.2183,
"objective": 0.22788210213184357,
"ranking_simple": 0.8041666746139526,
"regularize": 0.22788210213184357,
"step": 750,
"wo_beta": 2.6969196796417236
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 0.8603518009185791,
"eval_logits": -1.7998822927474976,
"eval_logps": -131.12570190429688,
"eval_loss": 0.8466524481773376,
"eval_objective": 0.8603518009185791,
"eval_ranking_simple": 0.5351966619491577,
"eval_regularize": 0.8603518009185791,
"eval_runtime": 370.8383,
"eval_samples_per_second": 15.613,
"eval_steps_per_second": 1.302,
"eval_wo_beta": 10.893115997314453,
"step": 750
},
{
"dpo_loss": 0.2121828943490982,
"epoch": 2.13982050070855,
"grad_norm": 20.710190502520984,
"learning_rate": 2.2791164325437046e-07,
"logits": -1.6399970054626465,
"logps": -126.25770568847656,
"loss": 0.2128,
"objective": 0.2121828943490982,
"ranking_simple": 0.8125,
"regularize": 0.2121828943490982,
"step": 755,
"wo_beta": 3.0046989917755127
},
{
"dpo_loss": 0.24169200658798218,
"epoch": 2.153991497401984,
"grad_norm": 22.67213482006544,
"learning_rate": 2.21013089412392e-07,
"logits": -1.6009403467178345,
"logps": -119.49600219726562,
"loss": 0.2344,
"objective": 0.24169200658798218,
"ranking_simple": 0.7916666865348816,
"regularize": 0.24169200658798218,
"step": 760,
"wo_beta": 2.758802890777588
},
{
"dpo_loss": 0.2243640273809433,
"epoch": 2.168162494095418,
"grad_norm": 20.64816775047569,
"learning_rate": 2.1419080780610122e-07,
"logits": -1.6553146839141846,
"logps": -120.87618255615234,
"loss": 0.2361,
"objective": 0.2243640273809433,
"ranking_simple": 0.8208333253860474,
"regularize": 0.2243640273809433,
"step": 765,
"wo_beta": 2.57806396484375
},
{
"dpo_loss": 0.2182272970676422,
"epoch": 2.182333490788852,
"grad_norm": 24.316819536544447,
"learning_rate": 2.0744666357916925e-07,
"logits": -1.6933544874191284,
"logps": -120.17996978759766,
"loss": 0.2274,
"objective": 0.2182272970676422,
"ranking_simple": 0.7916666865348816,
"regularize": 0.2182272970676422,
"step": 770,
"wo_beta": 2.544562816619873
},
{
"dpo_loss": 0.20803479850292206,
"epoch": 2.196504487482286,
"grad_norm": 23.36541458579555,
"learning_rate": 2.0078250051328782e-07,
"logits": -1.6226826906204224,
"logps": -122.02224731445312,
"loss": 0.2183,
"objective": 0.20803479850292206,
"ranking_simple": 0.8208333253860474,
"regularize": 0.20803479850292206,
"step": 775,
"wo_beta": 2.7029988765716553
},
{
"dpo_loss": 0.22727744281291962,
"epoch": 2.21067548417572,
"grad_norm": 24.538113581222706,
"learning_rate": 1.942001405240979e-07,
"logits": -1.5558067560195923,
"logps": -124.58464813232422,
"loss": 0.2171,
"objective": 0.22727744281291962,
"ranking_simple": 0.7541666626930237,
"regularize": 0.22727744281291962,
"step": 780,
"wo_beta": 3.0669195652008057
},
{
"dpo_loss": 0.21583274006843567,
"epoch": 2.2248464808691546,
"grad_norm": 28.544460286246334,
"learning_rate": 1.877013831630961e-07,
"logits": -1.5640733242034912,
"logps": -123.09439849853516,
"loss": 0.2152,
"objective": 0.21583274006843567,
"ranking_simple": 0.7958333492279053,
"regularize": 0.21583274006843567,
"step": 785,
"wo_beta": 3.0127415657043457
},
{
"dpo_loss": 0.1936068832874298,
"epoch": 2.2390174775625886,
"grad_norm": 23.5808144420256,
"learning_rate": 1.812880051256551e-07,
"logits": -1.5408331155776978,
"logps": -125.1629638671875,
"loss": 0.2122,
"objective": 0.1936068832874298,
"ranking_simple": 0.8583333492279053,
"regularize": 0.1936068832874298,
"step": 790,
"wo_beta": 2.2015647888183594
},
{
"dpo_loss": 0.21609367430210114,
"epoch": 2.2531884742560226,
"grad_norm": 23.539955858336363,
"learning_rate": 1.7496175976529337e-07,
"logits": -1.6351306438446045,
"logps": -125.9599609375,
"loss": 0.2139,
"objective": 0.21609367430210114,
"ranking_simple": 0.8374999761581421,
"regularize": 0.21609367430210114,
"step": 795,
"wo_beta": 2.052849292755127
},
{
"dpo_loss": 0.2304428219795227,
"epoch": 2.2673594709494567,
"grad_norm": 30.254441621642492,
"learning_rate": 1.6872437661432516e-07,
"logits": -1.6573865413665771,
"logps": -127.17088317871094,
"loss": 0.2338,
"objective": 0.2304428219795227,
"ranking_simple": 0.8291666507720947,
"regularize": 0.2304428219795227,
"step": 800,
"wo_beta": 2.813727617263794
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.864130973815918,
"eval_logits": -1.8069664239883423,
"eval_logps": -132.1160125732422,
"eval_loss": 0.8479817509651184,
"eval_objective": 0.864130973815918,
"eval_ranking_simple": 0.5351966619491577,
"eval_regularize": 0.864130973815918,
"eval_runtime": 369.7595,
"eval_samples_per_second": 15.659,
"eval_steps_per_second": 1.306,
"eval_wo_beta": 10.98104476928711,
"step": 800
},
{
"dpo_loss": 0.24076558649539948,
"epoch": 2.2815304676428907,
"grad_norm": 22.143907441363098,
"learning_rate": 1.62577560911024e-07,
"logits": -1.645892858505249,
"logps": -124.22594451904297,
"loss": 0.2325,
"objective": 0.24076558649539948,
"ranking_simple": 0.762499988079071,
"regularize": 0.24076558649539948,
"step": 805,
"wo_beta": 3.4595048427581787
},
{
"dpo_loss": 0.2532159686088562,
"epoch": 2.295701464336325,
"grad_norm": 22.775287720493225,
"learning_rate": 1.565229931334277e-07,
"logits": -1.7806832790374756,
"logps": -121.71713256835938,
"loss": 0.229,
"objective": 0.2532159686088562,
"ranking_simple": 0.8041666746139526,
"regularize": 0.2532159686088562,
"step": 810,
"wo_beta": 2.6662535667419434
},
{
"dpo_loss": 0.21587124466896057,
"epoch": 2.309872461029759,
"grad_norm": 20.40928105754396,
"learning_rate": 1.5056232853991208e-07,
"logits": -1.6817148923873901,
"logps": -123.68048858642578,
"loss": 0.1989,
"objective": 0.21587124466896057,
"ranking_simple": 0.800000011920929,
"regularize": 0.21587124466896057,
"step": 815,
"wo_beta": 2.549884557723999
},
{
"dpo_loss": 0.21313165128231049,
"epoch": 2.324043457723193,
"grad_norm": 24.789515132235113,
"learning_rate": 1.4469719671666043e-07,
"logits": -1.6495434045791626,
"logps": -124.32107543945312,
"loss": 0.2149,
"objective": 0.21313165128231049,
"ranking_simple": 0.8083333373069763,
"regularize": 0.21313165128231049,
"step": 820,
"wo_beta": 2.404456377029419
},
{
"dpo_loss": 0.21146027743816376,
"epoch": 2.3382144544166272,
"grad_norm": 21.660632425555523,
"learning_rate": 1.389292011321498e-07,
"logits": -1.692821979522705,
"logps": -122.83853149414062,
"loss": 0.2168,
"objective": 0.21146027743816376,
"ranking_simple": 0.8166666626930237,
"regularize": 0.21146027743816376,
"step": 825,
"wo_beta": 2.4975247383117676
},
{
"dpo_loss": 0.19700536131858826,
"epoch": 2.3523854511100613,
"grad_norm": 27.701358075237533,
"learning_rate": 1.3325991869878012e-07,
"logits": -1.6795495748519897,
"logps": -126.65140533447266,
"loss": 0.1967,
"objective": 0.19700536131858826,
"ranking_simple": 0.8374999761581421,
"regularize": 0.19700536131858826,
"step": 830,
"wo_beta": 2.146075487136841
},
{
"dpo_loss": 0.21794618666172028,
"epoch": 2.3665564478034957,
"grad_norm": 29.238202346662906,
"learning_rate": 1.2769089934176126e-07,
"logits": -1.647661566734314,
"logps": -125.17839813232422,
"loss": 0.2288,
"objective": 0.21794618666172028,
"ranking_simple": 0.8416666388511658,
"regularize": 0.21794618666172028,
"step": 835,
"wo_beta": 2.8974695205688477
},
{
"dpo_loss": 0.21900226175785065,
"epoch": 2.3807274444969297,
"grad_norm": 24.009821339334245,
"learning_rate": 1.222236655753791e-07,
"logits": -1.6093765497207642,
"logps": -125.18759155273438,
"loss": 0.2184,
"objective": 0.21900226175785065,
"ranking_simple": 0.7833333611488342,
"regularize": 0.21900226175785065,
"step": 840,
"wo_beta": 2.749152898788452
},
{
"dpo_loss": 0.24857226014137268,
"epoch": 2.3948984411903638,
"grad_norm": 27.5079137044431,
"learning_rate": 1.1685971208675538e-07,
"logits": -1.6249310970306396,
"logps": -127.47116088867188,
"loss": 0.2262,
"objective": 0.24857226014137268,
"ranking_simple": 0.7916666865348816,
"regularize": 0.24857226014137268,
"step": 845,
"wo_beta": 2.7568535804748535
},
{
"dpo_loss": 0.2085387259721756,
"epoch": 2.409069437883798,
"grad_norm": 25.878018469347772,
"learning_rate": 1.1160050532721527e-07,
"logits": -1.67723548412323,
"logps": -125.76563262939453,
"loss": 0.2015,
"objective": 0.2085387259721756,
"ranking_simple": 0.7958333492279053,
"regularize": 0.2085387259721756,
"step": 850,
"wo_beta": 2.600330352783203
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 0.8720477223396301,
"eval_logits": -1.80176842212677,
"eval_logps": -133.381103515625,
"eval_loss": 0.8571510910987854,
"eval_objective": 0.8720477223396301,
"eval_ranking_simple": 0.5377846956253052,
"eval_regularize": 0.8720477223396301,
"eval_runtime": 369.6782,
"eval_samples_per_second": 15.662,
"eval_steps_per_second": 1.307,
"eval_wo_beta": 11.025221824645996,
"step": 850
},
{
"dpo_loss": 0.2096593677997589,
"epoch": 2.423240434577232,
"grad_norm": 25.499456648461173,
"learning_rate": 1.0644748311137375e-07,
"logits": -1.6905667781829834,
"logps": -123.53242492675781,
"loss": 0.2195,
"objective": 0.2096593677997589,
"ranking_simple": 0.8458333611488342,
"regularize": 0.2096593677997589,
"step": 855,
"wo_beta": 1.9907649755477905
},
{
"dpo_loss": 0.21979431807994843,
"epoch": 2.4374114312706663,
"grad_norm": 24.848581285581997,
"learning_rate": 1.0140205422405212e-07,
"logits": -1.6141736507415771,
"logps": -127.53914642333984,
"loss": 0.2157,
"objective": 0.21979431807994843,
"ranking_simple": 0.7916666865348816,
"regularize": 0.21979431807994843,
"step": 860,
"wo_beta": 3.0604774951934814
},
{
"dpo_loss": 0.2267284095287323,
"epoch": 2.4515824279641003,
"grad_norm": 26.5384738263413,
"learning_rate": 9.646559803512993e-08,
"logits": -1.6312936544418335,
"logps": -125.63304138183594,
"loss": 0.2262,
"objective": 0.2267284095287323,
"ranking_simple": 0.7708333134651184,
"regularize": 0.2267284095287323,
"step": 865,
"wo_beta": 3.2560784816741943
},
{
"dpo_loss": 0.196714848279953,
"epoch": 2.4657534246575343,
"grad_norm": 22.187392253095393,
"learning_rate": 9.163946412243895e-08,
"logits": -1.6977574825286865,
"logps": -125.96756744384766,
"loss": 0.2037,
"objective": 0.196714848279953,
"ranking_simple": 0.824999988079071,
"regularize": 0.196714848279953,
"step": 870,
"wo_beta": 2.583970308303833
},
{
"dpo_loss": 0.1924598515033722,
"epoch": 2.4799244213509684,
"grad_norm": 25.12579870814084,
"learning_rate": 8.692497190280224e-08,
"logits": -1.5863568782806396,
"logps": -126.04496765136719,
"loss": 0.2038,
"objective": 0.1924598515033722,
"ranking_simple": 0.8583333492279053,
"regularize": 0.1924598515033722,
"step": 875,
"wo_beta": 2.296090602874756
},
{
"dpo_loss": 0.2359813004732132,
"epoch": 2.4940954180444024,
"grad_norm": 23.176269104907163,
"learning_rate": 8.232341027131883e-08,
"logits": -1.5722264051437378,
"logps": -125.54722595214844,
"loss": 0.2224,
"objective": 0.2359813004732132,
"ranking_simple": 0.7666666507720947,
"regularize": 0.2359813004732132,
"step": 880,
"wo_beta": 3.9758193492889404
},
{
"dpo_loss": 0.2542850375175476,
"epoch": 2.5082664147378364,
"grad_norm": 28.503952001031568,
"learning_rate": 7.783603724899257e-08,
"logits": -1.724973201751709,
"logps": -123.99037170410156,
"loss": 0.2326,
"objective": 0.2542850375175476,
"ranking_simple": 0.7958333492279053,
"regularize": 0.2542850375175476,
"step": 885,
"wo_beta": 3.509568929672241
},
{
"dpo_loss": 0.20098893344402313,
"epoch": 2.5224374114312704,
"grad_norm": 24.488143250342084,
"learning_rate": 7.346407963880136e-08,
"logits": -1.595066785812378,
"logps": -123.90083312988281,
"loss": 0.2056,
"objective": 0.20098893344402313,
"ranking_simple": 0.8291666507720947,
"regularize": 0.20098893344402313,
"step": 890,
"wo_beta": 2.0867483615875244
},
{
"dpo_loss": 0.22675950825214386,
"epoch": 2.536608408124705,
"grad_norm": 26.1383864365769,
"learning_rate": 6.92087326903022e-08,
"logits": -1.6463298797607422,
"logps": -126.22193908691406,
"loss": 0.2235,
"objective": 0.22675950825214386,
"ranking_simple": 0.7833333611488342,
"regularize": 0.22675950825214386,
"step": 895,
"wo_beta": 3.0455052852630615
},
{
"dpo_loss": 0.26797404885292053,
"epoch": 2.550779404818139,
"grad_norm": 29.177118299023412,
"learning_rate": 6.507115977286143e-08,
"logits": -1.5577763319015503,
"logps": -123.61392974853516,
"loss": 0.2348,
"objective": 0.26797404885292053,
"ranking_simple": 0.7749999761581421,
"regularize": 0.26797404885292053,
"step": 900,
"wo_beta": 3.1443541049957275
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.8675441145896912,
"eval_logits": -1.8113691806793213,
"eval_logps": -133.67962646484375,
"eval_loss": 0.8529655933380127,
"eval_objective": 0.8675441145896912,
"eval_ranking_simple": 0.5377846956253052,
"eval_regularize": 0.8675441145896912,
"eval_runtime": 369.0633,
"eval_samples_per_second": 15.688,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 10.942305564880371,
"step": 900
},
{
"dpo_loss": 0.20284312963485718,
"epoch": 2.564950401511573,
"grad_norm": 19.950697729864736,
"learning_rate": 6.105249205760127e-08,
"logits": -1.6392539739608765,
"logps": -123.07585906982422,
"loss": 0.2053,
"objective": 0.20284312963485718,
"ranking_simple": 0.8333333134651184,
"regularize": 0.20284312963485718,
"step": 905,
"wo_beta": 2.37481951713562
},
{
"dpo_loss": 0.23266027867794037,
"epoch": 2.579121398205007,
"grad_norm": 30.055735152469072,
"learning_rate": 5.7153828208148846e-08,
"logits": -1.6491973400115967,
"logps": -127.52837371826172,
"loss": 0.217,
"objective": 0.23266027867794037,
"ranking_simple": 0.800000011920929,
"regularize": 0.23266027867794037,
"step": 910,
"wo_beta": 3.36558198928833
},
{
"dpo_loss": 0.23302534222602844,
"epoch": 2.593292394898441,
"grad_norm": 22.890822851586893,
"learning_rate": 5.337623408027292e-08,
"logits": -1.7552512884140015,
"logps": -127.87796783447266,
"loss": 0.2121,
"objective": 0.23302534222602844,
"ranking_simple": 0.8208333253860474,
"regularize": 0.23302534222602844,
"step": 915,
"wo_beta": 2.502667188644409
},
{
"dpo_loss": 0.20162495970726013,
"epoch": 2.6074633915918755,
"grad_norm": 24.20273693189473,
"learning_rate": 4.972074243048896e-08,
"logits": -1.5814868211746216,
"logps": -127.01163482666016,
"loss": 0.2068,
"objective": 0.20162495970726013,
"ranking_simple": 0.8333333134651184,
"regularize": 0.20162495970726013,
"step": 920,
"wo_beta": 1.8146071434020996
},
{
"dpo_loss": 0.21291884779930115,
"epoch": 2.6216343882853095,
"grad_norm": 22.388349767286606,
"learning_rate": 4.6188352633713956e-08,
"logits": -1.5830769538879395,
"logps": -126.23300170898438,
"loss": 0.2195,
"objective": 0.21291884779930115,
"ranking_simple": 0.800000011920929,
"regularize": 0.21291884779930115,
"step": 925,
"wo_beta": 3.1612021923065186
},
{
"dpo_loss": 0.24175626039505005,
"epoch": 2.6358053849787435,
"grad_norm": 21.949168917598172,
"learning_rate": 4.2780030410047796e-08,
"logits": -1.6357704401016235,
"logps": -125.97001647949219,
"loss": 0.2179,
"objective": 0.24175626039505005,
"ranking_simple": 0.7708333134651184,
"regularize": 0.24175626039505005,
"step": 930,
"wo_beta": 3.436295747756958
},
{
"dpo_loss": 0.22544310986995697,
"epoch": 2.6499763816721775,
"grad_norm": 28.54777226492267,
"learning_rate": 3.949670756075446e-08,
"logits": -1.6567264795303345,
"logps": -123.90028381347656,
"loss": 0.2428,
"objective": 0.22544310986995697,
"ranking_simple": 0.8125,
"regularize": 0.22544310986995697,
"step": 935,
"wo_beta": 2.002594470977783
},
{
"dpo_loss": 0.19537684321403503,
"epoch": 2.6641473783656116,
"grad_norm": 27.292491316004774,
"learning_rate": 3.63392817135173e-08,
"logits": -1.6852660179138184,
"logps": -128.0804901123047,
"loss": 0.2163,
"objective": 0.19537684321403503,
"ranking_simple": 0.8083333373069763,
"regularize": 0.19537684321403503,
"step": 940,
"wo_beta": 3.2341361045837402
},
{
"dpo_loss": 0.21889939904212952,
"epoch": 2.678318375059046,
"grad_norm": 23.60505865361295,
"learning_rate": 3.330861607703611e-08,
"logits": -1.7046397924423218,
"logps": -125.53500366210938,
"loss": 0.206,
"objective": 0.21889939904212952,
"ranking_simple": 0.7875000238418579,
"regularize": 0.21889939904212952,
"step": 945,
"wo_beta": 3.0100159645080566
},
{
"dpo_loss": 0.20888622105121613,
"epoch": 2.69248937175248,
"grad_norm": 24.640589732429273,
"learning_rate": 3.040553920503502e-08,
"logits": -1.5867191553115845,
"logps": -123.99597930908203,
"loss": 0.2268,
"objective": 0.20888622105121613,
"ranking_simple": 0.8208333253860474,
"regularize": 0.20888622105121613,
"step": 950,
"wo_beta": 2.38991379737854
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 0.8684250116348267,
"eval_logits": -1.8135783672332764,
"eval_logps": -133.2828826904297,
"eval_loss": 0.8525474667549133,
"eval_objective": 0.8684250116348267,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.8684250116348267,
"eval_runtime": 373.8008,
"eval_samples_per_second": 15.49,
"eval_steps_per_second": 1.292,
"eval_wo_beta": 10.9784517288208,
"step": 950
},
{
"dpo_loss": 0.21907640993595123,
"epoch": 2.706660368445914,
"grad_norm": 25.73877923734523,
"learning_rate": 2.7630844769743756e-08,
"logits": -1.696962833404541,
"logps": -125.86216735839844,
"loss": 0.217,
"objective": 0.21907640993595123,
"ranking_simple": 0.8333333134651184,
"regularize": 0.21907640993595123,
"step": 955,
"wo_beta": 2.471001148223877
},
{
"dpo_loss": 0.24159465730190277,
"epoch": 2.720831365139348,
"grad_norm": 27.398682783569342,
"learning_rate": 2.4985291344915673e-08,
"logits": -1.6559653282165527,
"logps": -125.80078125,
"loss": 0.2243,
"objective": 0.24159465730190277,
"ranking_simple": 0.8041666746139526,
"regularize": 0.24159465730190277,
"step": 960,
"wo_beta": 3.4911081790924072
},
{
"dpo_loss": 0.17974473536014557,
"epoch": 2.735002361832782,
"grad_norm": 26.129978647933363,
"learning_rate": 2.2469602198441573e-08,
"logits": -1.6600605249404907,
"logps": -125.82157897949219,
"loss": 0.2216,
"objective": 0.17974473536014557,
"ranking_simple": 0.8458333611488342,
"regularize": 0.17974473536014557,
"step": 965,
"wo_beta": 1.8301441669464111
},
{
"dpo_loss": 0.2389921396970749,
"epoch": 2.7491733585262166,
"grad_norm": 24.93627247502558,
"learning_rate": 2.008446509461498e-08,
"logits": -1.6814639568328857,
"logps": -127.4894027709961,
"loss": 0.2119,
"objective": 0.2389921396970749,
"ranking_simple": 0.8333333134651184,
"regularize": 0.2389921396970749,
"step": 970,
"wo_beta": 2.071624517440796
},
{
"dpo_loss": 0.21195697784423828,
"epoch": 2.7633443552196506,
"grad_norm": 24.386661793540505,
"learning_rate": 1.7830532106104746e-08,
"logits": -1.6035431623458862,
"logps": -124.46774291992188,
"loss": 0.2154,
"objective": 0.21195697784423828,
"ranking_simple": 0.8333333134651184,
"regularize": 0.21195697784423828,
"step": 975,
"wo_beta": 3.027677059173584
},
{
"dpo_loss": 0.2177601158618927,
"epoch": 2.7775153519130846,
"grad_norm": 30.722672013551914,
"learning_rate": 1.570841943568446e-08,
"logits": -1.7569483518600464,
"logps": -126.10541534423828,
"loss": 0.2135,
"objective": 0.2177601158618927,
"ranking_simple": 0.824999988079071,
"regularize": 0.2177601158618927,
"step": 980,
"wo_beta": 1.981053113937378
},
{
"dpo_loss": 0.21660226583480835,
"epoch": 2.7916863486065187,
"grad_norm": 24.268020278235447,
"learning_rate": 1.3718707247769134e-08,
"logits": -1.5750867128372192,
"logps": -122.41696166992188,
"loss": 0.2142,
"objective": 0.21660226583480835,
"ranking_simple": 0.8208333253860474,
"regularize": 0.21660226583480835,
"step": 985,
"wo_beta": 2.762291431427002
},
{
"dpo_loss": 0.23230423033237457,
"epoch": 2.8058573452999527,
"grad_norm": 25.3957119119687,
"learning_rate": 1.1861939509803686e-08,
"logits": -1.626227617263794,
"logps": -125.03184509277344,
"loss": 0.2309,
"objective": 0.23230423033237457,
"ranking_simple": 0.7916666865348816,
"regularize": 0.23230423033237457,
"step": 990,
"wo_beta": 3.499976396560669
},
{
"dpo_loss": 0.22918492555618286,
"epoch": 2.820028341993387,
"grad_norm": 26.589296858875272,
"learning_rate": 1.0138623843548078e-08,
"logits": -1.691062092781067,
"logps": -126.53192901611328,
"loss": 0.2269,
"objective": 0.22918492555618286,
"ranking_simple": 0.7916666865348816,
"regularize": 0.22918492555618286,
"step": 995,
"wo_beta": 3.2248499393463135
},
{
"dpo_loss": 0.22611786425113678,
"epoch": 2.8341993386868207,
"grad_norm": 23.891964045353923,
"learning_rate": 8.54923138629815e-09,
"logits": -1.6503469944000244,
"logps": -122.57675170898438,
"loss": 0.2198,
"objective": 0.22611786425113678,
"ranking_simple": 0.8333333134651184,
"regularize": 0.22611786425113678,
"step": 1000,
"wo_beta": 2.427152395248413
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.8652148246765137,
"eval_logits": -1.8167296648025513,
"eval_logps": -132.88087463378906,
"eval_loss": 0.8492961525917053,
"eval_objective": 0.8652148246765137,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 0.8652148246765137,
"eval_runtime": 368.2335,
"eval_samples_per_second": 15.724,
"eval_steps_per_second": 1.312,
"eval_wo_beta": 10.938254356384277,
"step": 1000
},
{
"dpo_loss": 0.24608786404132843,
"epoch": 2.848370335380255,
"grad_norm": 27.602968631540815,
"learning_rate": 7.09419666208183e-09,
"logits": -1.6333565711975098,
"logps": -123.5007553100586,
"loss": 0.2273,
"objective": 0.24608786404132843,
"ranking_simple": 0.7916666865348816,
"regularize": 0.24608786404132843,
"step": 1005,
"wo_beta": 3.1694223880767822
},
{
"dpo_loss": 0.20009997487068176,
"epoch": 2.862541332073689,
"grad_norm": 25.090299269603783,
"learning_rate": 5.773917462864264e-09,
"logits": -1.6704093217849731,
"logps": -125.71635437011719,
"loss": 0.2037,
"objective": 0.20009997487068176,
"ranking_simple": 0.800000011920929,
"regularize": 0.20009997487068176,
"step": 1010,
"wo_beta": 2.577218532562256
},
{
"dpo_loss": 0.21036744117736816,
"epoch": 2.8767123287671232,
"grad_norm": 28.329606007932064,
"learning_rate": 4.588754739795586e-09,
"logits": -1.6348304748535156,
"logps": -122.55280303955078,
"loss": 0.2113,
"objective": 0.21036744117736816,
"ranking_simple": 0.8083333373069763,
"regularize": 0.21036744117736816,
"step": 1015,
"wo_beta": 2.199587345123291
},
{
"dpo_loss": 0.21461248397827148,
"epoch": 2.8908833254605573,
"grad_norm": 24.62375043047653,
"learning_rate": 3.53903250453047e-09,
"logits": -1.5652154684066772,
"logps": -125.25798797607422,
"loss": 0.2252,
"objective": 0.21461248397827148,
"ranking_simple": 0.7708333134651184,
"regularize": 0.21461248397827148,
"step": 1020,
"wo_beta": 3.251589059829712
},
{
"dpo_loss": 0.23392058908939362,
"epoch": 2.9050543221539913,
"grad_norm": 25.1548357787363,
"learning_rate": 2.6250377406467627e-09,
"logits": -1.6649322509765625,
"logps": -124.19398498535156,
"loss": 0.2291,
"objective": 0.23392058908939362,
"ranking_simple": 0.8083333373069763,
"regularize": 0.23392058908939362,
"step": 1025,
"wo_beta": 2.934882879257202
},
{
"dpo_loss": 0.19529716670513153,
"epoch": 2.9192253188474258,
"grad_norm": 23.978894569817292,
"learning_rate": 1.8470203251865768e-09,
"logits": -1.7404934167861938,
"logps": -123.60317993164062,
"loss": 0.2177,
"objective": 0.19529716670513153,
"ranking_simple": 0.7666666507720947,
"regularize": 0.19529716670513153,
"step": 1030,
"wo_beta": 2.584693670272827
},
{
"dpo_loss": 0.19891007244586945,
"epoch": 2.9333963155408598,
"grad_norm": 22.489637587674896,
"learning_rate": 1.2051929603428823e-09,
"logits": -1.6503539085388184,
"logps": -126.50735473632812,
"loss": 0.2124,
"objective": 0.19891007244586945,
"ranking_simple": 0.8125,
"regularize": 0.19891007244586945,
"step": 1035,
"wo_beta": 2.176048755645752
},
{
"dpo_loss": 0.21993538737297058,
"epoch": 2.947567312234294,
"grad_norm": 23.591827890954175,
"learning_rate": 6.997311153086882e-10,
"logits": -1.6743519306182861,
"logps": -125.27057647705078,
"loss": 0.2151,
"objective": 0.21993538737297058,
"ranking_simple": 0.800000011920929,
"regularize": 0.21993538737297058,
"step": 1040,
"wo_beta": 3.0266594886779785
},
{
"dpo_loss": 0.22178266942501068,
"epoch": 2.961738308927728,
"grad_norm": 26.41640596804321,
"learning_rate": 3.3077297830541585e-10,
"logits": -1.6230467557907104,
"logps": -128.82781982421875,
"loss": 0.2256,
"objective": 0.22178266942501068,
"ranking_simple": 0.8041666746139526,
"regularize": 0.22178266942501068,
"step": 1045,
"wo_beta": 3.258082866668701
},
{
"dpo_loss": 0.21878859400749207,
"epoch": 2.975909305621162,
"grad_norm": 25.001515767941786,
"learning_rate": 9.841941880361914e-11,
"logits": -1.6594524383544922,
"logps": -121.61707305908203,
"loss": 0.2221,
"objective": 0.21878859400749207,
"ranking_simple": 0.7958333492279053,
"regularize": 0.21878859400749207,
"step": 1050,
"wo_beta": 2.6373982429504395
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 0.8653033375740051,
"eval_logits": -1.816504955291748,
"eval_logps": -132.85667419433594,
"eval_loss": 0.8493290543556213,
"eval_objective": 0.8653033375740051,
"eval_ranking_simple": 0.534679114818573,
"eval_regularize": 0.8653033375740051,
"eval_runtime": 369.7292,
"eval_samples_per_second": 15.66,
"eval_steps_per_second": 1.306,
"eval_wo_beta": 10.94184398651123,
"step": 1050
},
{
"dpo_loss": 0.22742925584316254,
"epoch": 2.9900803023145963,
"grad_norm": 25.264967316936268,
"learning_rate": 2.7339599464326622e-12,
"logits": -1.6433926820755005,
"logps": -123.5830078125,
"loss": 0.2221,
"objective": 0.22742925584316254,
"ranking_simple": 0.800000011920929,
"regularize": 0.22742925584316254,
"step": 1055,
"wo_beta": 2.7176921367645264
},
{
"epoch": 2.992914501653283,
"step": 1056,
"total_flos": 0.0,
"train_loss": 0.37872021553586377,
"train_runtime": 34657.2213,
"train_samples_per_second": 4.398,
"train_steps_per_second": 0.03
}
],
"logging_steps": 5,
"max_steps": 1056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}