{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.992914501653283, "eval_steps": 50, "global_step": 1056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 18.39703870091124, "learning_rate": 9.433962264150943e-09, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.2708282470703125 }, { "dpo_loss": 0.6930365562438965, "epoch": 0.014170996693434105, "grad_norm": 18.759209504732585, "learning_rate": 4.7169811320754715e-08, "logits": -1.429247498512268, "logps": -83.84073638916016, "loss": 0.6931, "objective": 0.6930365562438965, "ranking_simple": 0.4895833432674408, "regularize": 0.6930365562438965, "step": 5, "wo_beta": 7.670312404632568 }, { "dpo_loss": 0.6933786869049072, "epoch": 0.02834199338686821, "grad_norm": 20.439115048712264, "learning_rate": 9.433962264150943e-08, "logits": -1.40127694606781, "logps": -84.69605255126953, "loss": 0.6932, "objective": 0.6933786869049072, "ranking_simple": 0.5791666507720947, "regularize": 0.6933786869049072, "step": 10, "wo_beta": 5.937962055206299 }, { "dpo_loss": 0.6909440755844116, "epoch": 0.042512990080302314, "grad_norm": 18.777743501771415, "learning_rate": 1.4150943396226414e-07, "logits": -1.5383602380752563, "logps": -84.21109771728516, "loss": 0.6921, "objective": 0.6909440755844116, "ranking_simple": 0.5666666626930237, "regularize": 0.6909440755844116, "step": 15, "wo_beta": 6.64866828918457 }, { "dpo_loss": 0.6903365254402161, "epoch": 0.05668398677373642, "grad_norm": 18.03474083688221, "learning_rate": 1.8867924528301886e-07, "logits": -1.3631871938705444, "logps": -83.19156646728516, "loss": 0.6904, "objective": 0.6903365254402161, "ranking_simple": 0.48750001192092896, "regularize": 0.6903365254402161, "step": 20, "wo_beta": 8.294000625610352 }, { "dpo_loss": 0.6920856237411499, "epoch": 0.07085498346717052, "grad_norm": 17.418659582335064, "learning_rate": 2.3584905660377358e-07, "logits": -1.3736237287521362, "logps": -83.09373474121094, "loss": 0.6904, "objective": 0.6920856237411499, "ranking_simple": 0.5666666626930237, "regularize": 0.6920856237411499, "step": 25, "wo_beta": 6.352960109710693 }, { "dpo_loss": 0.686029851436615, "epoch": 0.08502598016060463, "grad_norm": 16.102325248569215, "learning_rate": 2.830188679245283e-07, "logits": -1.4392812252044678, "logps": -82.93743133544922, "loss": 0.6866, "objective": 0.686029851436615, "ranking_simple": 0.48750001192092896, "regularize": 0.686029851436615, "step": 30, "wo_beta": 8.934405326843262 }, { "dpo_loss": 0.692076563835144, "epoch": 0.09919697685403873, "grad_norm": 17.863783183076404, "learning_rate": 3.30188679245283e-07, "logits": -1.4204154014587402, "logps": -82.21839904785156, "loss": 0.6871, "objective": 0.692076563835144, "ranking_simple": 0.5333333611488342, "regularize": 0.692076563835144, "step": 35, "wo_beta": 6.40147590637207 }, { "dpo_loss": 0.68491530418396, "epoch": 0.11336797354747284, "grad_norm": 19.094434967317397, "learning_rate": 3.773584905660377e-07, "logits": -1.4201196432113647, "logps": -82.76591491699219, "loss": 0.6809, "objective": 0.68491530418396, "ranking_simple": 0.5166666507720947, "regularize": 0.68491530418396, "step": 40, "wo_beta": 6.802278518676758 }, { "dpo_loss": 0.681048572063446, "epoch": 0.12753897024090693, "grad_norm": 19.32762735250478, "learning_rate": 4.2452830188679244e-07, "logits": -1.448870301246643, "logps": -82.9134292602539, "loss": 0.6834, "objective": 0.681048572063446, "ranking_simple": 0.5249999761581421, "regularize": 0.681048572063446, "step": 45, "wo_beta": 6.972365856170654 }, { "dpo_loss": 0.676356852054596, "epoch": 0.14170996693434104, "grad_norm": 18.06878353401125, "learning_rate": 4.7169811320754717e-07, "logits": -1.4718233346939087, "logps": -83.58888244628906, "loss": 0.6719, "objective": 0.676356852054596, "ranking_simple": 0.4958333373069763, "regularize": 0.676356852054596, "step": 50, "wo_beta": 7.712605953216553 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6879124045372009, "eval_logits": -1.4697412252426147, "eval_logps": -89.67761993408203, "eval_loss": 0.685627281665802, "eval_objective": 0.6879124045372009, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.6879124045372009, "eval_runtime": 368.3427, "eval_samples_per_second": 15.719, "eval_steps_per_second": 1.311, "eval_wo_beta": 7.922134876251221, "step": 50 }, { "dpo_loss": 0.6702864766120911, "epoch": 0.15588096362777515, "grad_norm": 19.335503669780774, "learning_rate": 5.188679245283019e-07, "logits": -1.4462206363677979, "logps": -83.2486801147461, "loss": 0.6744, "objective": 0.6702864766120911, "ranking_simple": 0.4958333373069763, "regularize": 0.6702864766120911, "step": 55, "wo_beta": 7.6053338050842285 }, { "dpo_loss": 0.6743167638778687, "epoch": 0.17005196032120926, "grad_norm": 19.32608740622528, "learning_rate": 5.660377358490566e-07, "logits": -1.3781672716140747, "logps": -84.30441284179688, "loss": 0.6703, "objective": 0.6743167638778687, "ranking_simple": 0.5375000238418579, "regularize": 0.6743167638778687, "step": 60, "wo_beta": 6.470597267150879 }, { "dpo_loss": 0.6568139791488647, "epoch": 0.18422295701464336, "grad_norm": 21.002392018495687, "learning_rate": 6.132075471698112e-07, "logits": -1.4774647951126099, "logps": -81.55781555175781, "loss": 0.6619, "objective": 0.6568139791488647, "ranking_simple": 0.5375000238418579, "regularize": 0.6568139791488647, "step": 65, "wo_beta": 6.502650260925293 }, { "dpo_loss": 0.6504150629043579, "epoch": 0.19839395370807747, "grad_norm": 18.63585310886347, "learning_rate": 6.60377358490566e-07, "logits": -1.5209298133850098, "logps": -84.6080551147461, "loss": 0.6501, "objective": 0.6504150629043579, "ranking_simple": 0.5625, "regularize": 0.6504150629043579, "step": 70, "wo_beta": 6.957874774932861 }, { "dpo_loss": 0.653372049331665, "epoch": 0.21256495040151158, "grad_norm": 20.115959212821757, "learning_rate": 7.075471698113207e-07, "logits": -1.4559004306793213, "logps": -85.126953125, "loss": 0.6557, "objective": 0.653372049331665, "ranking_simple": 0.5458333492279053, "regularize": 0.653372049331665, "step": 75, "wo_beta": 6.480815410614014 }, { "dpo_loss": 0.6524909734725952, "epoch": 0.22673594709494568, "grad_norm": 23.881541586489003, "learning_rate": 7.547169811320754e-07, "logits": -1.567487120628357, "logps": -84.74055480957031, "loss": 0.6601, "objective": 0.6524909734725952, "ranking_simple": 0.5666666626930237, "regularize": 0.6524909734725952, "step": 80, "wo_beta": 6.057122707366943 }, { "dpo_loss": 0.6443823575973511, "epoch": 0.2409069437883798, "grad_norm": 18.676003369955335, "learning_rate": 8.018867924528302e-07, "logits": -1.5786373615264893, "logps": -84.9271011352539, "loss": 0.6489, "objective": 0.6443823575973511, "ranking_simple": 0.5666666626930237, "regularize": 0.6443823575973511, "step": 85, "wo_beta": 5.872949600219727 }, { "dpo_loss": 0.6308037638664246, "epoch": 0.25507794048181387, "grad_norm": 18.36712876918143, "learning_rate": 8.490566037735849e-07, "logits": -1.693374752998352, "logps": -85.01197052001953, "loss": 0.6429, "objective": 0.6308037638664246, "ranking_simple": 0.5458333492279053, "regularize": 0.6308037638664246, "step": 90, "wo_beta": 6.442193031311035 }, { "dpo_loss": 0.6293771266937256, "epoch": 0.269248937175248, "grad_norm": 19.143294691787162, "learning_rate": 8.962264150943396e-07, "logits": -1.6244534254074097, "logps": -84.94630432128906, "loss": 0.6392, "objective": 0.6293771266937256, "ranking_simple": 0.5208333134651184, "regularize": 0.6293771266937256, "step": 95, "wo_beta": 7.510855197906494 }, { "dpo_loss": 0.6378400921821594, "epoch": 0.2834199338686821, "grad_norm": 18.923136932397636, "learning_rate": 9.433962264150943e-07, "logits": -1.531855821609497, "logps": -84.18727111816406, "loss": 0.6459, "objective": 0.6378400921821594, "ranking_simple": 0.5291666388511658, "regularize": 0.6378400921821594, "step": 100, "wo_beta": 6.881345748901367 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6793138384819031, "eval_logits": -1.6510542631149292, "eval_logps": -92.99544525146484, "eval_loss": 0.6764773726463318, "eval_objective": 0.6793138384819031, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.6793138384819031, "eval_runtime": 367.8383, "eval_samples_per_second": 15.741, "eval_steps_per_second": 1.313, "eval_wo_beta": 7.872693061828613, "step": 100 }, { "dpo_loss": 0.6514045000076294, "epoch": 0.2975909305621162, "grad_norm": 24.32830193794003, "learning_rate": 9.90566037735849e-07, "logits": -1.7460098266601562, "logps": -88.28630828857422, "loss": 0.6307, "objective": 0.6514045000076294, "ranking_simple": 0.574999988079071, "regularize": 0.6514045000076294, "step": 105, "wo_beta": 6.015078067779541 }, { "dpo_loss": 0.6186487674713135, "epoch": 0.3117619272555503, "grad_norm": 21.425599904477778, "learning_rate": 9.99956257238817e-07, "logits": -1.6647683382034302, "logps": -88.7564468383789, "loss": 0.619, "objective": 0.6186487674713135, "ranking_simple": 0.5249999761581421, "regularize": 0.6186487674713135, "step": 110, "wo_beta": 6.501535892486572 }, { "dpo_loss": 0.6382968425750732, "epoch": 0.32593292394898443, "grad_norm": 23.57030350866866, "learning_rate": 9.997785653888834e-07, "logits": -1.6553956270217896, "logps": -88.90452575683594, "loss": 0.6244, "objective": 0.6382968425750732, "ranking_simple": 0.5666666626930237, "regularize": 0.6382968425750732, "step": 115, "wo_beta": 6.804749488830566 }, { "dpo_loss": 0.613271176815033, "epoch": 0.3401039206424185, "grad_norm": 17.498249311505813, "learning_rate": 9.994642390694308e-07, "logits": -1.6392256021499634, "logps": -87.69567108154297, "loss": 0.6212, "objective": 0.613271176815033, "ranking_simple": 0.574999988079071, "regularize": 0.613271176815033, "step": 120, "wo_beta": 7.195651054382324 }, { "dpo_loss": 0.6102784872055054, "epoch": 0.35427491733585265, "grad_norm": 17.38350046383242, "learning_rate": 9.990133642141357e-07, "logits": -1.6804019212722778, "logps": -89.06767272949219, "loss": 0.6218, "objective": 0.6102784872055054, "ranking_simple": 0.5208333134651184, "regularize": 0.6102784872055054, "step": 125, "wo_beta": 7.266691207885742 }, { "dpo_loss": 0.6158983111381531, "epoch": 0.3684459140292867, "grad_norm": 18.419918693556507, "learning_rate": 9.98426064087682e-07, "logits": -1.6602026224136353, "logps": -86.5518569946289, "loss": 0.6238, "objective": 0.6158983111381531, "ranking_simple": 0.5833333134651184, "regularize": 0.6158983111381531, "step": 130, "wo_beta": 5.715666770935059 }, { "dpo_loss": 0.6510148644447327, "epoch": 0.3826169107227208, "grad_norm": 21.943398704973582, "learning_rate": 9.977024992520601e-07, "logits": -1.7066783905029297, "logps": -88.5932846069336, "loss": 0.6289, "objective": 0.6510148644447327, "ranking_simple": 0.5166666507720947, "regularize": 0.6510148644447327, "step": 135, "wo_beta": 7.166342258453369 }, { "dpo_loss": 0.6169579029083252, "epoch": 0.39678790741615494, "grad_norm": 19.9956879065625, "learning_rate": 9.968428675226713e-07, "logits": -1.6791799068450928, "logps": -89.6558837890625, "loss": 0.6144, "objective": 0.6169579029083252, "ranking_simple": 0.5833333134651184, "regularize": 0.6169579029083252, "step": 140, "wo_beta": 6.211281776428223 }, { "dpo_loss": 0.5698094964027405, "epoch": 0.410958904109589, "grad_norm": 20.71491947708098, "learning_rate": 9.958474039142469e-07, "logits": -1.6847442388534546, "logps": -89.56360626220703, "loss": 0.5875, "objective": 0.5698094964027405, "ranking_simple": 0.6499999761581421, "regularize": 0.5698094964027405, "step": 145, "wo_beta": 5.130297660827637 }, { "dpo_loss": 0.6153029203414917, "epoch": 0.42512990080302315, "grad_norm": 19.57556543746145, "learning_rate": 9.947163805765979e-07, "logits": -1.70354425907135, "logps": -89.73717498779297, "loss": 0.5993, "objective": 0.6153029203414917, "ranking_simple": 0.550000011920929, "regularize": 0.6153029203414917, "step": 150, "wo_beta": 8.4924955368042 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6804503202438354, "eval_logits": -1.6963125467300415, "eval_logps": -95.27294921875, "eval_loss": 0.6770597696304321, "eval_objective": 0.6804503202438354, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.6804503202438354, "eval_runtime": 370.2983, "eval_samples_per_second": 15.636, "eval_steps_per_second": 1.304, "eval_wo_beta": 8.2155179977417, "step": 150 }, { "dpo_loss": 0.59377521276474, "epoch": 0.43930089749645723, "grad_norm": 23.61683273964934, "learning_rate": 9.934501067202117e-07, "logits": -1.7478511333465576, "logps": -87.8529052734375, "loss": 0.5935, "objective": 0.59377521276474, "ranking_simple": 0.5375000238418579, "regularize": 0.59377521276474, "step": 155, "wo_beta": 7.3897318840026855 }, { "dpo_loss": 0.589878261089325, "epoch": 0.45347189418989137, "grad_norm": 19.912032474590337, "learning_rate": 9.92048928531717e-07, "logits": -1.6176892518997192, "logps": -88.71855926513672, "loss": 0.5812, "objective": 0.589878261089325, "ranking_simple": 0.5874999761581421, "regularize": 0.589878261089325, "step": 160, "wo_beta": 6.206328868865967 }, { "dpo_loss": 0.5920292735099792, "epoch": 0.46764289088332545, "grad_norm": 21.374874744599914, "learning_rate": 9.905132290792392e-07, "logits": -1.745171308517456, "logps": -91.21366119384766, "loss": 0.5859, "objective": 0.5920292735099792, "ranking_simple": 0.5416666865348816, "regularize": 0.5920292735099792, "step": 165, "wo_beta": 6.208406448364258 }, { "dpo_loss": 0.6067599654197693, "epoch": 0.4818138875767596, "grad_norm": 19.998053262503415, "learning_rate": 9.888434282076757e-07, "logits": -1.8083672523498535, "logps": -91.927734375, "loss": 0.5862, "objective": 0.6067599654197693, "ranking_simple": 0.5375000238418579, "regularize": 0.6067599654197693, "step": 170, "wo_beta": 7.094420433044434 }, { "dpo_loss": 0.5829775929450989, "epoch": 0.49598488427019366, "grad_norm": 22.779470361992754, "learning_rate": 9.870399824239114e-07, "logits": -1.6424000263214111, "logps": -91.81340789794922, "loss": 0.5844, "objective": 0.5829775929450989, "ranking_simple": 0.5708333253860474, "regularize": 0.5829775929450989, "step": 175, "wo_beta": 6.69417667388916 }, { "dpo_loss": 0.5724970698356628, "epoch": 0.5101558809636277, "grad_norm": 19.56386680488567, "learning_rate": 9.851033847720164e-07, "logits": -1.6553891897201538, "logps": -91.84220123291016, "loss": 0.5461, "objective": 0.5724970698356628, "ranking_simple": 0.5375000238418579, "regularize": 0.5724970698356628, "step": 180, "wo_beta": 6.825948238372803 }, { "dpo_loss": 0.5537912845611572, "epoch": 0.5243268776570619, "grad_norm": 23.895057754013074, "learning_rate": 9.83034164698452e-07, "logits": -1.6311272382736206, "logps": -90.77505493164062, "loss": 0.5628, "objective": 0.5537912845611572, "ranking_simple": 0.5791666507720947, "regularize": 0.5537912845611572, "step": 185, "wo_beta": 5.920953750610352 }, { "dpo_loss": 0.5373654961585999, "epoch": 0.538497874350496, "grad_norm": 23.921385246249166, "learning_rate": 9.808328879073251e-07, "logits": -1.727196216583252, "logps": -92.44843292236328, "loss": 0.5475, "objective": 0.5373654961585999, "ranking_simple": 0.6291666626930237, "regularize": 0.5373654961585999, "step": 190, "wo_beta": 5.952234745025635 }, { "dpo_loss": 0.5564671754837036, "epoch": 0.5526688710439301, "grad_norm": 24.71069656183637, "learning_rate": 9.78500156205731e-07, "logits": -1.7691571712493896, "logps": -102.49636840820312, "loss": 0.5838, "objective": 0.5564671754837036, "ranking_simple": 0.6041666865348816, "regularize": 0.5564671754837036, "step": 195, "wo_beta": 6.274513244628906 }, { "dpo_loss": 0.5659002661705017, "epoch": 0.5668398677373642, "grad_norm": 18.721765242615913, "learning_rate": 9.760366073392244e-07, "logits": -1.7705143690109253, "logps": -108.12271118164062, "loss": 0.5557, "objective": 0.5659002661705017, "ranking_simple": 0.5583333373069763, "regularize": 0.5659002661705017, "step": 200, "wo_beta": 6.690428733825684 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6866137981414795, "eval_logits": -1.8150068521499634, "eval_logps": -115.46802520751953, "eval_loss": 0.6857941150665283, "eval_objective": 0.6866137981414795, "eval_ranking_simple": 0.5295031070709229, "eval_regularize": 0.6866137981414795, "eval_runtime": 369.0235, "eval_samples_per_second": 15.69, "eval_steps_per_second": 1.309, "eval_wo_beta": 7.96071720123291, "step": 200 }, { "dpo_loss": 0.5478584170341492, "epoch": 0.5810108644307983, "grad_norm": 21.07102836538459, "learning_rate": 9.734429148174674e-07, "logits": -1.6568210124969482, "logps": -108.47331237792969, "loss": 0.545, "objective": 0.5478584170341492, "ranking_simple": 0.5874999761581421, "regularize": 0.5478584170341492, "step": 205, "wo_beta": 7.015623092651367 }, { "dpo_loss": 0.5228937268257141, "epoch": 0.5951818611242324, "grad_norm": 20.651776585733355, "learning_rate": 9.707197877300973e-07, "logits": -1.7239099740982056, "logps": -108.58413696289062, "loss": 0.5299, "objective": 0.5228937268257141, "ranking_simple": 0.625, "regularize": 0.5228937268257141, "step": 210, "wo_beta": 5.675539970397949 }, { "dpo_loss": 0.5450037121772766, "epoch": 0.6093528578176665, "grad_norm": 23.006204591176342, "learning_rate": 9.678679705528698e-07, "logits": -1.818426251411438, "logps": -108.85588073730469, "loss": 0.5504, "objective": 0.5450037121772766, "ranking_simple": 0.5625, "regularize": 0.5450037121772766, "step": 215, "wo_beta": 6.871977806091309 }, { "dpo_loss": 0.5016953945159912, "epoch": 0.6235238545111006, "grad_norm": 21.55242582954206, "learning_rate": 9.648882429441256e-07, "logits": -1.796720266342163, "logps": -106.0498046875, "loss": 0.5337, "objective": 0.5016953945159912, "ranking_simple": 0.5958333611488342, "regularize": 0.5016953945159912, "step": 220, "wo_beta": 6.975856304168701 }, { "dpo_loss": 0.5535920262336731, "epoch": 0.6376948512045347, "grad_norm": 18.336769425586553, "learning_rate": 9.61781419531641e-07, "logits": -1.8808425664901733, "logps": -103.36882781982422, "loss": 0.5444, "objective": 0.5535920262336731, "ranking_simple": 0.5833333134651184, "regularize": 0.5535920262336731, "step": 225, "wo_beta": 6.945814609527588 }, { "dpo_loss": 0.5067029595375061, "epoch": 0.6518658478979689, "grad_norm": 20.395537847593562, "learning_rate": 9.585483496899149e-07, "logits": -1.781941533088684, "logps": -102.78409576416016, "loss": 0.5275, "objective": 0.5067029595375061, "ranking_simple": 0.6541666388511658, "regularize": 0.5067029595375061, "step": 230, "wo_beta": 4.95630407333374 }, { "dpo_loss": 0.5263319611549377, "epoch": 0.6660368445914029, "grad_norm": 24.579908080459226, "learning_rate": 9.551899173079606e-07, "logits": -1.7018815279006958, "logps": -107.19641876220703, "loss": 0.5235, "objective": 0.5263319611549377, "ranking_simple": 0.5958333611488342, "regularize": 0.5263319611549377, "step": 235, "wo_beta": 6.136026859283447 }, { "dpo_loss": 0.5419639945030212, "epoch": 0.680207841284837, "grad_norm": 25.457069150013837, "learning_rate": 9.517070405476574e-07, "logits": -1.8716365098953247, "logps": -109.35581970214844, "loss": 0.5391, "objective": 0.5419639945030212, "ranking_simple": 0.637499988079071, "regularize": 0.5419639945030212, "step": 240, "wo_beta": 7.386639595031738 }, { "dpo_loss": 0.599401593208313, "epoch": 0.6943788379782712, "grad_norm": 20.952521291073488, "learning_rate": 9.481006715927351e-07, "logits": -1.8737353086471558, "logps": -105.67475128173828, "loss": 0.5586, "objective": 0.599401593208313, "ranking_simple": 0.625, "regularize": 0.599401593208313, "step": 245, "wo_beta": 6.23760986328125 }, { "dpo_loss": 0.5379212498664856, "epoch": 0.7085498346717053, "grad_norm": 17.076599406497994, "learning_rate": 9.443717963884568e-07, "logits": -1.6231579780578613, "logps": -97.79161834716797, "loss": 0.5428, "objective": 0.5379212498664856, "ranking_simple": 0.6083333492279053, "regularize": 0.5379212498664856, "step": 250, "wo_beta": 5.470564842224121 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.674086332321167, "eval_logits": -1.8494712114334106, "eval_logps": -102.56678771972656, "eval_loss": 0.6744823455810547, "eval_objective": 0.674086332321167, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 0.674086332321167, "eval_runtime": 369.2355, "eval_samples_per_second": 15.681, "eval_steps_per_second": 1.308, "eval_wo_beta": 7.989133358001709, "step": 250 }, { "dpo_loss": 0.4980238378047943, "epoch": 0.7227208313651393, "grad_norm": 18.645503031645394, "learning_rate": 9.405214343720706e-07, "logits": -1.8677008152008057, "logps": -94.05529022216797, "loss": 0.5176, "objective": 0.4980238378047943, "ranking_simple": 0.5833333134651184, "regularize": 0.4980238378047943, "step": 255, "wo_beta": 5.837521553039551 }, { "dpo_loss": 0.5208728909492493, "epoch": 0.7368918280585735, "grad_norm": 22.040310265005534, "learning_rate": 9.365506381941065e-07, "logits": -1.8145065307617188, "logps": -96.94599914550781, "loss": 0.5381, "objective": 0.5208728909492493, "ranking_simple": 0.5708333253860474, "regularize": 0.5208728909492493, "step": 260, "wo_beta": 6.736252784729004 }, { "dpo_loss": 0.4980500638484955, "epoch": 0.7510628247520076, "grad_norm": 23.39044199948042, "learning_rate": 9.32460493430591e-07, "logits": -1.7500866651535034, "logps": -96.90016174316406, "loss": 0.5023, "objective": 0.4980500638484955, "ranking_simple": 0.6583333611488342, "regularize": 0.4980500638484955, "step": 265, "wo_beta": 6.221852779388428 }, { "dpo_loss": 0.5561876893043518, "epoch": 0.7652338214454416, "grad_norm": 20.73702481860498, "learning_rate": 9.282521182862629e-07, "logits": -1.8606762886047363, "logps": -96.93506622314453, "loss": 0.5259, "objective": 0.5561876893043518, "ranking_simple": 0.550000011920929, "regularize": 0.5561876893043518, "step": 270, "wo_beta": 6.527937412261963 }, { "dpo_loss": 0.5460684299468994, "epoch": 0.7794048181388757, "grad_norm": 19.98766091687606, "learning_rate": 9.239266632888658e-07, "logits": -1.697192668914795, "logps": -96.09431457519531, "loss": 0.5114, "objective": 0.5460684299468994, "ranking_simple": 0.574999988079071, "regularize": 0.5460684299468994, "step": 275, "wo_beta": 6.768658638000488 }, { "dpo_loss": 0.47327375411987305, "epoch": 0.7935758148323099, "grad_norm": 21.02838944330306, "learning_rate": 9.194853109746072e-07, "logits": -1.8069101572036743, "logps": -97.56507873535156, "loss": 0.491, "objective": 0.47327375411987305, "ranking_simple": 0.625, "regularize": 0.47327375411987305, "step": 280, "wo_beta": 5.885926723480225 }, { "dpo_loss": 0.5277642607688904, "epoch": 0.807746811525744, "grad_norm": 22.871584835681148, "learning_rate": 9.14929275564863e-07, "logits": -1.8146883249282837, "logps": -99.09355926513672, "loss": 0.5087, "objective": 0.5277642607688904, "ranking_simple": 0.5791666507720947, "regularize": 0.5277642607688904, "step": 285, "wo_beta": 7.316993236541748 }, { "dpo_loss": 0.43708011507987976, "epoch": 0.821917808219178, "grad_norm": 25.529916734053177, "learning_rate": 9.102598026342222e-07, "logits": -1.8029001951217651, "logps": -96.13628387451172, "loss": 0.4688, "objective": 0.43708011507987976, "ranking_simple": 0.625, "regularize": 0.43708011507987976, "step": 290, "wo_beta": 4.752199649810791 }, { "dpo_loss": 0.49055105447769165, "epoch": 0.8360888049126122, "grad_norm": 26.014797277152265, "learning_rate": 9.0547816876996e-07, "logits": -1.8255597352981567, "logps": -96.79292297363281, "loss": 0.4938, "objective": 0.49055105447769165, "ranking_simple": 0.5708333253860474, "regularize": 0.49055105447769165, "step": 295, "wo_beta": 6.931264400482178 }, { "dpo_loss": 0.4934200048446655, "epoch": 0.8502598016060463, "grad_norm": 22.235912832826426, "learning_rate": 9.005856812230304e-07, "logits": -1.8234219551086426, "logps": -98.131103515625, "loss": 0.4987, "objective": 0.4934200048446655, "ranking_simple": 0.6541666388511658, "regularize": 0.4934200048446655, "step": 300, "wo_beta": 4.583770275115967 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.7202683091163635, "eval_logits": -1.9276690483093262, "eval_logps": -110.09490966796875, "eval_loss": 0.711872398853302, "eval_objective": 0.7202683091163635, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 0.7202683091163635, "eval_runtime": 369.1999, "eval_samples_per_second": 15.683, "eval_steps_per_second": 1.308, "eval_wo_beta": 8.926669120788574, "step": 300 }, { "dpo_loss": 0.5209147334098816, "epoch": 0.8644307982994804, "grad_norm": 21.742768863559323, "learning_rate": 8.955836775506775e-07, "logits": -1.8012293577194214, "logps": -102.54885864257812, "loss": 0.4739, "objective": 0.5209147334098816, "ranking_simple": 0.6333333253860474, "regularize": 0.5209147334098816, "step": 305, "wo_beta": 5.649385929107666 }, { "dpo_loss": 0.4898562729358673, "epoch": 0.8786017949929145, "grad_norm": 22.266286975377888, "learning_rate": 8.904735252507609e-07, "logits": -1.8041430711746216, "logps": -101.15433502197266, "loss": 0.488, "objective": 0.4898562729358673, "ranking_simple": 0.6499999761581421, "regularize": 0.4898562729358673, "step": 310, "wo_beta": 6.073896408081055 }, { "dpo_loss": 0.4698447585105896, "epoch": 0.8927727916863486, "grad_norm": 22.62826678390999, "learning_rate": 8.852566213878946e-07, "logits": -1.798747181892395, "logps": -101.55271911621094, "loss": 0.4839, "objective": 0.4698447585105896, "ranking_simple": 0.6083333492279053, "regularize": 0.4698447585105896, "step": 315, "wo_beta": 5.962237358093262 }, { "dpo_loss": 0.4936090409755707, "epoch": 0.9069437883797827, "grad_norm": 25.39576707145419, "learning_rate": 8.799343922115043e-07, "logits": -1.7406024932861328, "logps": -110.01341247558594, "loss": 0.5003, "objective": 0.4936090409755707, "ranking_simple": 0.6666666865348816, "regularize": 0.4936090409755707, "step": 320, "wo_beta": 4.6922502517700195 }, { "dpo_loss": 0.464478075504303, "epoch": 0.9211147850732169, "grad_norm": 22.63000678324253, "learning_rate": 8.745082927659046e-07, "logits": -1.8573758602142334, "logps": -106.48271179199219, "loss": 0.4601, "objective": 0.464478075504303, "ranking_simple": 0.637499988079071, "regularize": 0.464478075504303, "step": 325, "wo_beta": 5.424474239349365 }, { "dpo_loss": 0.4378047287464142, "epoch": 0.9352857817666509, "grad_norm": 26.572088004567764, "learning_rate": 8.689798064925048e-07, "logits": -1.6883081197738647, "logps": -104.9384765625, "loss": 0.4724, "objective": 0.4378047287464142, "ranking_simple": 0.6416666507720947, "regularize": 0.4378047287464142, "step": 330, "wo_beta": 5.635779857635498 }, { "dpo_loss": 0.4327344000339508, "epoch": 0.949456778460085, "grad_norm": 28.535014872524457, "learning_rate": 8.633504448242504e-07, "logits": -1.6694140434265137, "logps": -107.3168716430664, "loss": 0.4595, "objective": 0.4327344000339508, "ranking_simple": 0.6708333492279053, "regularize": 0.4327344000339508, "step": 335, "wo_beta": 5.270318508148193 }, { "dpo_loss": 0.4696439504623413, "epoch": 0.9636277751535192, "grad_norm": 29.414333709201422, "learning_rate": 8.576217467724127e-07, "logits": -1.7229362726211548, "logps": -106.86974334716797, "loss": 0.5044, "objective": 0.4696439504623413, "ranking_simple": 0.637499988079071, "regularize": 0.4696439504623413, "step": 340, "wo_beta": 5.814812660217285 }, { "dpo_loss": 0.43202081322669983, "epoch": 0.9777987718469532, "grad_norm": 21.299611385252437, "learning_rate": 8.517952785058384e-07, "logits": -1.7784336805343628, "logps": -101.36027526855469, "loss": 0.4585, "objective": 0.43202081322669983, "ranking_simple": 0.6958333253860474, "regularize": 0.43202081322669983, "step": 345, "wo_beta": 4.479858875274658 }, { "dpo_loss": 0.4188024699687958, "epoch": 0.9919697685403873, "grad_norm": 20.8027959046757, "learning_rate": 8.458726329227747e-07, "logits": -1.6966959238052368, "logps": -99.99575805664062, "loss": 0.4599, "objective": 0.4188024699687958, "ranking_simple": 0.762499988079071, "regularize": 0.4188024699687958, "step": 350, "wo_beta": 4.140859603881836 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6912401914596558, "eval_logits": -1.8473907709121704, "eval_logps": -104.9832763671875, "eval_loss": 0.6885838508605957, "eval_objective": 0.6912401914596558, "eval_ranking_simple": 0.5351966619491577, "eval_regularize": 0.6912401914596558, "eval_runtime": 375.4224, "eval_samples_per_second": 15.423, "eval_steps_per_second": 1.287, "eval_wo_beta": 8.374881744384766, "step": 350 }, { "dpo_loss": 0.4273616671562195, "epoch": 1.0061407652338215, "grad_norm": 19.801974743070023, "learning_rate": 8.398554292153865e-07, "logits": -1.9038132429122925, "logps": -97.71568298339844, "loss": 0.4198, "objective": 0.4273616671562195, "ranking_simple": 0.6708333492279053, "regularize": 0.4273616671562195, "step": 355, "wo_beta": 4.851585388183594 }, { "dpo_loss": 0.3563633859157562, "epoch": 1.0203117619272555, "grad_norm": 27.347143857547685, "learning_rate": 8.337453124270862e-07, "logits": -1.720730185508728, "logps": -104.91397094726562, "loss": 0.3668, "objective": 0.3563633859157562, "ranking_simple": 0.6958333253860474, "regularize": 0.3563633859157562, "step": 360, "wo_beta": 4.480281352996826 }, { "dpo_loss": 0.34869903326034546, "epoch": 1.0344827586206897, "grad_norm": 21.348824435063978, "learning_rate": 8.275439530027947e-07, "logits": -1.730131983757019, "logps": -111.44975280761719, "loss": 0.3663, "objective": 0.34869903326034546, "ranking_simple": 0.7041666507720947, "regularize": 0.34869903326034546, "step": 365, "wo_beta": 3.829502820968628 }, { "dpo_loss": 0.3821018934249878, "epoch": 1.0486537553141237, "grad_norm": 23.44162059864065, "learning_rate": 8.212530463322582e-07, "logits": -1.7056537866592407, "logps": -104.91122436523438, "loss": 0.3708, "objective": 0.3821018934249878, "ranking_simple": 0.6875, "regularize": 0.3821018934249878, "step": 370, "wo_beta": 5.17144250869751 }, { "dpo_loss": 0.3470642864704132, "epoch": 1.0628247520075578, "grad_norm": 21.682188897692722, "learning_rate": 8.148743122865463e-07, "logits": -1.7548179626464844, "logps": -104.17964172363281, "loss": 0.3665, "objective": 0.3470642864704132, "ranking_simple": 0.6875, "regularize": 0.3470642864704132, "step": 375, "wo_beta": 3.6793243885040283 }, { "dpo_loss": 0.35725000500679016, "epoch": 1.076995748700992, "grad_norm": 22.228388769880265, "learning_rate": 8.084094947478554e-07, "logits": -1.7486780881881714, "logps": -107.26901245117188, "loss": 0.3593, "objective": 0.35725000500679016, "ranking_simple": 0.7083333134651184, "regularize": 0.35725000500679016, "step": 380, "wo_beta": 3.6982295513153076 }, { "dpo_loss": 0.36463692784309387, "epoch": 1.091166745394426, "grad_norm": 21.586757686789323, "learning_rate": 8.018603611327504e-07, "logits": -1.6958861351013184, "logps": -106.55477905273438, "loss": 0.3642, "objective": 0.36463692784309387, "ranking_simple": 0.7041666507720947, "regularize": 0.36463692784309387, "step": 385, "wo_beta": 4.191780090332031 }, { "dpo_loss": 0.32307326793670654, "epoch": 1.10533774208786, "grad_norm": 21.44019149145441, "learning_rate": 7.952287019089685e-07, "logits": -1.6498711109161377, "logps": -109.88943481445312, "loss": 0.3408, "objective": 0.32307326793670654, "ranking_simple": 0.737500011920929, "regularize": 0.32307326793670654, "step": 390, "wo_beta": 4.2303996086120605 }, { "dpo_loss": 0.3521862328052521, "epoch": 1.1195087387812943, "grad_norm": 25.820266993638842, "learning_rate": 7.88516330105925e-07, "logits": -1.726596713066101, "logps": -108.69520568847656, "loss": 0.347, "objective": 0.3521862328052521, "ranking_simple": 0.7291666865348816, "regularize": 0.3521862328052521, "step": 395, "wo_beta": 3.9551658630371094 }, { "dpo_loss": 0.343461811542511, "epoch": 1.1336797354747283, "grad_norm": 22.721697115040456, "learning_rate": 7.817250808190483e-07, "logits": -1.8199702501296997, "logps": -106.22103118896484, "loss": 0.3498, "objective": 0.343461811542511, "ranking_simple": 0.7208333611488342, "regularize": 0.343461811542511, "step": 400, "wo_beta": 3.51658034324646 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.751783549785614, "eval_logits": -1.8806556463241577, "eval_logps": -115.0888671875, "eval_loss": 0.7462870478630066, "eval_objective": 0.751783549785614, "eval_ranking_simple": 0.5517598390579224, "eval_regularize": 0.751783549785614, "eval_runtime": 370.3425, "eval_samples_per_second": 15.634, "eval_steps_per_second": 1.304, "eval_wo_beta": 9.550463676452637, "step": 400 }, { "dpo_loss": 0.3792721927165985, "epoch": 1.1478507321681626, "grad_norm": 32.9676442992408, "learning_rate": 7.74856810708083e-07, "logits": -1.7736860513687134, "logps": -107.8192367553711, "loss": 0.3694, "objective": 0.3792721927165985, "ranking_simple": 0.7208333611488342, "regularize": 0.3792721927165985, "step": 405, "wo_beta": 5.114300727844238 }, { "dpo_loss": 0.3683342933654785, "epoch": 1.1620217288615966, "grad_norm": 21.343924046898124, "learning_rate": 7.679133974894982e-07, "logits": -1.7912460565567017, "logps": -105.35082244873047, "loss": 0.3582, "objective": 0.3683342933654785, "ranking_simple": 0.7166666388511658, "regularize": 0.3683342933654785, "step": 410, "wo_beta": 5.410634517669678 }, { "dpo_loss": 0.3712156116962433, "epoch": 1.1761927255550306, "grad_norm": 20.631633414155974, "learning_rate": 7.608967394231386e-07, "logits": -1.7320811748504639, "logps": -101.5705337524414, "loss": 0.3623, "objective": 0.3712156116962433, "ranking_simple": 0.6958333253860474, "regularize": 0.3712156116962433, "step": 415, "wo_beta": 5.24213171005249 }, { "dpo_loss": 0.34116995334625244, "epoch": 1.1903637222484649, "grad_norm": 21.634750653170887, "learning_rate": 7.538087547932584e-07, "logits": -1.67872953414917, "logps": -101.14533233642578, "loss": 0.359, "objective": 0.34116995334625244, "ranking_simple": 0.6791666746139526, "regularize": 0.34116995334625244, "step": 420, "wo_beta": 4.399470329284668 }, { "dpo_loss": 0.3757858872413635, "epoch": 1.204534718941899, "grad_norm": 21.74672077991223, "learning_rate": 7.466513813840824e-07, "logits": -1.733936071395874, "logps": -99.8553466796875, "loss": 0.3472, "objective": 0.3757858872413635, "ranking_simple": 0.7083333134651184, "regularize": 0.3757858872413635, "step": 425, "wo_beta": 4.1172003746032715 }, { "dpo_loss": 0.3544313311576843, "epoch": 1.2187057156353331, "grad_norm": 25.498669816549643, "learning_rate": 7.394265759500347e-07, "logits": -1.7176556587219238, "logps": -105.41299438476562, "loss": 0.3569, "objective": 0.3544313311576843, "ranking_simple": 0.6958333253860474, "regularize": 0.3544313311576843, "step": 430, "wo_beta": 4.10382604598999 }, { "dpo_loss": 0.35303547978401184, "epoch": 1.2328767123287672, "grad_norm": 24.236679665592966, "learning_rate": 7.321363136807818e-07, "logits": -1.6603659391403198, "logps": -107.16277313232422, "loss": 0.3778, "objective": 0.35303547978401184, "ranking_simple": 0.75, "regularize": 0.35303547978401184, "step": 435, "wo_beta": 3.8118536472320557 }, { "dpo_loss": 0.32170751690864563, "epoch": 1.2470477090222012, "grad_norm": 25.528958844465567, "learning_rate": 7.247825876612352e-07, "logits": -1.6398621797561646, "logps": -105.9729995727539, "loss": 0.3429, "objective": 0.32170751690864563, "ranking_simple": 0.762499988079071, "regularize": 0.32170751690864563, "step": 440, "wo_beta": 3.466229200363159 }, { "dpo_loss": 0.35508811473846436, "epoch": 1.2612187057156352, "grad_norm": 20.894838905133128, "learning_rate": 7.173674083266623e-07, "logits": -1.6645927429199219, "logps": -106.34298706054688, "loss": 0.3613, "objective": 0.35508811473846436, "ranking_simple": 0.737500011920929, "regularize": 0.35508811473846436, "step": 445, "wo_beta": 4.097968101501465 }, { "dpo_loss": 0.32759609818458557, "epoch": 1.2753897024090695, "grad_norm": 21.051410520730908, "learning_rate": 7.098928029130528e-07, "logits": -1.8029848337173462, "logps": -107.71712493896484, "loss": 0.3361, "objective": 0.32759609818458557, "ranking_simple": 0.7583333253860474, "regularize": 0.32759609818458557, "step": 450, "wo_beta": 3.633894443511963 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.7672637701034546, "eval_logits": -1.8356177806854248, "eval_logps": -116.80036926269531, "eval_loss": 0.7562825679779053, "eval_objective": 0.7672637701034546, "eval_ranking_simple": 0.5419254899024963, "eval_regularize": 0.7672637701034546, "eval_runtime": 367.9761, "eval_samples_per_second": 15.735, "eval_steps_per_second": 1.313, "eval_wo_beta": 9.725224494934082, "step": 450 }, { "dpo_loss": 0.3631579279899597, "epoch": 1.2895606991025035, "grad_norm": 27.512782619434457, "learning_rate": 7.023608149028936e-07, "logits": -1.6034198999404907, "logps": -110.81619262695312, "loss": 0.3689, "objective": 0.3631579279899597, "ranking_simple": 0.6708333492279053, "regularize": 0.3631579279899597, "step": 455, "wo_beta": 4.727202892303467 }, { "dpo_loss": 0.3907562792301178, "epoch": 1.3037316957959377, "grad_norm": 24.168580409550643, "learning_rate": 6.947735034665001e-07, "logits": -1.7300904989242554, "logps": -108.0628890991211, "loss": 0.3712, "objective": 0.3907562792301178, "ranking_simple": 0.6583333611488342, "regularize": 0.3907562792301178, "step": 460, "wo_beta": 5.317975997924805 }, { "dpo_loss": 0.3442947268486023, "epoch": 1.3179026924893718, "grad_norm": 22.349252886711223, "learning_rate": 6.871329428990601e-07, "logits": -1.7600762844085693, "logps": -102.89717102050781, "loss": 0.366, "objective": 0.3442947268486023, "ranking_simple": 0.7166666388511658, "regularize": 0.3442947268486023, "step": 465, "wo_beta": 4.05583381652832 }, { "dpo_loss": 0.36818769574165344, "epoch": 1.3320736891828058, "grad_norm": 21.171077905427786, "learning_rate": 6.794412220535425e-07, "logits": -1.8482578992843628, "logps": -100.47290802001953, "loss": 0.3612, "objective": 0.36818769574165344, "ranking_simple": 0.7250000238418579, "regularize": 0.36818769574165344, "step": 470, "wo_beta": 3.844135046005249 }, { "dpo_loss": 0.3290488123893738, "epoch": 1.34624468587624, "grad_norm": 23.17750388094903, "learning_rate": 6.717004437696249e-07, "logits": -1.6789878606796265, "logps": -102.9361572265625, "loss": 0.3486, "objective": 0.3290488123893738, "ranking_simple": 0.7541666626930237, "regularize": 0.3290488123893738, "step": 475, "wo_beta": 3.849888324737549 }, { "dpo_loss": 0.3331288993358612, "epoch": 1.360415682569674, "grad_norm": 31.57784191234236, "learning_rate": 6.639127242987987e-07, "logits": -1.7997510433197021, "logps": -105.81382751464844, "loss": 0.3418, "objective": 0.3331288993358612, "ranking_simple": 0.7124999761581421, "regularize": 0.3331288993358612, "step": 480, "wo_beta": 5.502153396606445 }, { "dpo_loss": 0.3518763482570648, "epoch": 1.3745866792631083, "grad_norm": 25.72021920875409, "learning_rate": 6.560801927258079e-07, "logits": -1.763397455215454, "logps": -105.35774993896484, "loss": 0.3606, "objective": 0.3518763482570648, "ranking_simple": 0.7416666746139526, "regularize": 0.3518763482570648, "step": 485, "wo_beta": 4.792428016662598 }, { "dpo_loss": 0.3617197871208191, "epoch": 1.3887576759565423, "grad_norm": 22.21535306569678, "learning_rate": 6.482049903865768e-07, "logits": -1.7518786191940308, "logps": -109.02607727050781, "loss": 0.3414, "objective": 0.3617197871208191, "ranking_simple": 0.7124999761581421, "regularize": 0.3617197871208191, "step": 490, "wo_beta": 3.807429790496826 }, { "dpo_loss": 0.33107537031173706, "epoch": 1.4029286726499763, "grad_norm": 28.27182569867929, "learning_rate": 6.402892702827916e-07, "logits": -1.7524651288986206, "logps": -110.29097747802734, "loss": 0.3453, "objective": 0.33107537031173706, "ranking_simple": 0.7083333134651184, "regularize": 0.33107537031173706, "step": 495, "wo_beta": 4.099748611450195 }, { "dpo_loss": 0.3612217307090759, "epoch": 1.4170996693434104, "grad_norm": 26.907643867295747, "learning_rate": 6.323351964932908e-07, "logits": -1.6837791204452515, "logps": -109.02066802978516, "loss": 0.3584, "objective": 0.3612217307090759, "ranking_simple": 0.7041666507720947, "regularize": 0.3612217307090759, "step": 500, "wo_beta": 3.91571307182312 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7694771885871887, "eval_logits": -1.8626275062561035, "eval_logps": -117.51673126220703, "eval_loss": 0.7635300159454346, "eval_objective": 0.7694771885871887, "eval_ranking_simple": 0.5419254899024963, "eval_regularize": 0.7694771885871887, "eval_runtime": 370.4437, "eval_samples_per_second": 15.63, "eval_steps_per_second": 1.304, "eval_wo_beta": 9.631916999816895, "step": 500 }, { "dpo_loss": 0.2975477874279022, "epoch": 1.4312706660368446, "grad_norm": 22.51215190081199, "learning_rate": 6.243449435824276e-07, "logits": -1.7515524625778198, "logps": -109.30506134033203, "loss": 0.3308, "objective": 0.2975477874279022, "ranking_simple": 0.75, "regularize": 0.2975477874279022, "step": 505, "wo_beta": 3.399846076965332 }, { "dpo_loss": 0.3239024877548218, "epoch": 1.4454416627302786, "grad_norm": 25.756520666755268, "learning_rate": 6.163206960055652e-07, "logits": -1.7505611181259155, "logps": -108.20401000976562, "loss": 0.3502, "objective": 0.3239024877548218, "ranking_simple": 0.75, "regularize": 0.3239024877548218, "step": 510, "wo_beta": 3.145045042037964 }, { "dpo_loss": 0.3271316587924957, "epoch": 1.4596126594237129, "grad_norm": 22.00239796246049, "learning_rate": 6.082646475118699e-07, "logits": -1.8232632875442505, "logps": -107.82081604003906, "loss": 0.3356, "objective": 0.3271316587924957, "ranking_simple": 0.6708333492279053, "regularize": 0.3271316587924957, "step": 515, "wo_beta": 4.939964771270752 }, { "dpo_loss": 0.30039647221565247, "epoch": 1.473783656117147, "grad_norm": 22.8094809920846, "learning_rate": 6.001790005445606e-07, "logits": -1.6817113161087036, "logps": -106.67537689208984, "loss": 0.3092, "objective": 0.30039647221565247, "ranking_simple": 0.7541666626930237, "regularize": 0.30039647221565247, "step": 520, "wo_beta": 3.6650784015655518 }, { "dpo_loss": 0.34032100439071655, "epoch": 1.487954652810581, "grad_norm": 25.538184332619608, "learning_rate": 5.920659656387836e-07, "logits": -1.5860577821731567, "logps": -107.61659240722656, "loss": 0.3466, "objective": 0.34032100439071655, "ranking_simple": 0.75, "regularize": 0.34032100439071655, "step": 525, "wo_beta": 4.031210422515869 }, { "dpo_loss": 0.3295021951198578, "epoch": 1.5021256495040152, "grad_norm": 22.986849455924027, "learning_rate": 5.839277608172738e-07, "logits": -1.743402123451233, "logps": -111.25679016113281, "loss": 0.3262, "objective": 0.3295021951198578, "ranking_simple": 0.737500011920929, "regularize": 0.3295021951198578, "step": 530, "wo_beta": 3.9925944805145264 }, { "dpo_loss": 0.29601436853408813, "epoch": 1.5162966461974492, "grad_norm": 25.044023904201577, "learning_rate": 5.757666109839702e-07, "logits": -1.7071605920791626, "logps": -112.61083984375, "loss": 0.3088, "objective": 0.29601436853408813, "ranking_simple": 0.7541666626930237, "regularize": 0.29601436853408813, "step": 535, "wo_beta": 3.535855531692505 }, { "dpo_loss": 0.350533127784729, "epoch": 1.5304676428908834, "grad_norm": 22.77917543570306, "learning_rate": 5.675847473157485e-07, "logits": -1.6420516967773438, "logps": -114.60887145996094, "loss": 0.338, "objective": 0.350533127784729, "ranking_simple": 0.7708333134651184, "regularize": 0.350533127784729, "step": 540, "wo_beta": 3.302687644958496 }, { "dpo_loss": 0.31907784938812256, "epoch": 1.5446386395843175, "grad_norm": 27.52281539562519, "learning_rate": 5.5938440665244e-07, "logits": -1.7368324995040894, "logps": -117.18190002441406, "loss": 0.3311, "objective": 0.31907784938812256, "ranking_simple": 0.7875000238418579, "regularize": 0.31907784938812256, "step": 545, "wo_beta": 2.7296268939971924 }, { "dpo_loss": 0.3072589933872223, "epoch": 1.5588096362777515, "grad_norm": 24.90909396316388, "learning_rate": 5.511678308853025e-07, "logits": -1.7334787845611572, "logps": -118.51893615722656, "loss": 0.3343, "objective": 0.3072589933872223, "ranking_simple": 0.7208333611488342, "regularize": 0.3072589933872223, "step": 550, "wo_beta": 3.902778148651123 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7814171314239502, "eval_logits": -1.8209288120269775, "eval_logps": -123.38627624511719, "eval_loss": 0.7698224186897278, "eval_objective": 0.7814171314239502, "eval_ranking_simple": 0.5351966619491577, "eval_regularize": 0.7814171314239502, "eval_runtime": 367.7587, "eval_samples_per_second": 15.744, "eval_steps_per_second": 1.313, "eval_wo_beta": 9.825753211975098, "step": 550 }, { "dpo_loss": 0.34100914001464844, "epoch": 1.5729806329711855, "grad_norm": 24.299530028848974, "learning_rate": 5.429372663441085e-07, "logits": -1.5191646814346313, "logps": -116.5060806274414, "loss": 0.339, "objective": 0.34100914001464844, "ranking_simple": 0.737500011920929, "regularize": 0.34100914001464844, "step": 555, "wo_beta": 3.8065884113311768 }, { "dpo_loss": 0.3122609257698059, "epoch": 1.5871516296646198, "grad_norm": 26.61780255104318, "learning_rate": 5.34694963183022e-07, "logits": -1.5900938510894775, "logps": -112.42914581298828, "loss": 0.334, "objective": 0.3122609257698059, "ranking_simple": 0.7666666507720947, "regularize": 0.3122609257698059, "step": 560, "wo_beta": 4.191233158111572 }, { "dpo_loss": 0.3086826205253601, "epoch": 1.601322626358054, "grad_norm": 23.97171206395212, "learning_rate": 5.264431747654283e-07, "logits": -1.5865463018417358, "logps": -112.55115509033203, "loss": 0.334, "objective": 0.3086826205253601, "ranking_simple": 0.7333333492279053, "regularize": 0.3086826205253601, "step": 565, "wo_beta": 3.760244846343994 }, { "dpo_loss": 0.3280898928642273, "epoch": 1.615493623051488, "grad_norm": 26.790227869518926, "learning_rate": 5.181841570478872e-07, "logits": -1.7293529510498047, "logps": -114.50617980957031, "loss": 0.3065, "objective": 0.3280898928642273, "ranking_simple": 0.7166666388511658, "regularize": 0.3280898928642273, "step": 570, "wo_beta": 4.093240737915039 }, { "dpo_loss": 0.356030136346817, "epoch": 1.629664619744922, "grad_norm": 29.38280925874381, "learning_rate": 5.099201679633768e-07, "logits": -1.7021836042404175, "logps": -112.18016052246094, "loss": 0.3442, "objective": 0.356030136346817, "ranking_simple": 0.7458333373069763, "regularize": 0.356030136346817, "step": 575, "wo_beta": 3.8104941844940186 }, { "dpo_loss": 0.2992390990257263, "epoch": 1.643835616438356, "grad_norm": 24.710654774900764, "learning_rate": 5.016534668039976e-07, "logits": -1.73283052444458, "logps": -111.73848724365234, "loss": 0.3113, "objective": 0.2992390990257263, "ranking_simple": 0.762499988079071, "regularize": 0.2992390990257263, "step": 580, "wo_beta": 2.8948888778686523 }, { "dpo_loss": 0.30101045966148376, "epoch": 1.6580066131317903, "grad_norm": 25.736134751129057, "learning_rate": 4.933863136033039e-07, "logits": -1.5684159994125366, "logps": -111.7412109375, "loss": 0.3245, "objective": 0.30101045966148376, "ranking_simple": 0.7124999761581421, "regularize": 0.30101045966148376, "step": 585, "wo_beta": 3.5469541549682617 }, { "dpo_loss": 0.29352518916130066, "epoch": 1.6721776098252243, "grad_norm": 28.292224202430326, "learning_rate": 4.851209685184338e-07, "logits": -1.6094284057617188, "logps": -114.35250854492188, "loss": 0.3264, "objective": 0.29352518916130066, "ranking_simple": 0.7458333373069763, "regularize": 0.29352518916130066, "step": 590, "wo_beta": 3.287980794906616 }, { "dpo_loss": 0.32477742433547974, "epoch": 1.6863486065186586, "grad_norm": 26.952229320357798, "learning_rate": 4.768596912122045e-07, "logits": -1.5896912813186646, "logps": -114.52155303955078, "loss": 0.3252, "objective": 0.32477742433547974, "ranking_simple": 0.7416666746139526, "regularize": 0.32477742433547974, "step": 595, "wo_beta": 4.611125469207764 }, { "dpo_loss": 0.2708142399787903, "epoch": 1.7005196032120926, "grad_norm": 26.06064163090054, "learning_rate": 4.686047402353433e-07, "logits": -1.6145151853561401, "logps": -114.16989135742188, "loss": 0.3105, "objective": 0.2708142399787903, "ranking_simple": 0.7708333134651184, "regularize": 0.2708142399787903, "step": 600, "wo_beta": 3.207538604736328 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.7761210799217224, "eval_logits": -1.7865931987762451, "eval_logps": -119.82308959960938, "eval_loss": 0.7679409980773926, "eval_objective": 0.7761210799217224, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.7761210799217224, "eval_runtime": 372.6791, "eval_samples_per_second": 15.536, "eval_steps_per_second": 1.296, "eval_wo_beta": 9.803099632263184, "step": 600 }, { "dpo_loss": 0.32439085841178894, "epoch": 1.7146905999055266, "grad_norm": 25.8504927528126, "learning_rate": 4.60358372409022e-07, "logits": -1.602834701538086, "logps": -112.0418472290039, "loss": 0.3292, "objective": 0.32439085841178894, "ranking_simple": 0.7250000238418579, "regularize": 0.32439085841178894, "step": 605, "wo_beta": 4.832021713256836 }, { "dpo_loss": 0.36444205045700073, "epoch": 1.7288615965989607, "grad_norm": 26.17653629180954, "learning_rate": 4.521228422078649e-07, "logits": -1.6547633409500122, "logps": -110.03471374511719, "loss": 0.3239, "objective": 0.36444205045700073, "ranking_simple": 0.75, "regularize": 0.36444205045700073, "step": 610, "wo_beta": 4.15641975402832 }, { "dpo_loss": 0.3479357659816742, "epoch": 1.743032593292395, "grad_norm": 25.45060113638119, "learning_rate": 4.439004011435979e-07, "logits": -1.6919087171554565, "logps": -109.51602172851562, "loss": 0.3279, "objective": 0.3479357659816742, "ranking_simple": 0.7250000238418579, "regularize": 0.3479357659816742, "step": 615, "wo_beta": 4.716867446899414 }, { "dpo_loss": 0.33088982105255127, "epoch": 1.7572035899858292, "grad_norm": 26.887147654271335, "learning_rate": 4.3569329714950703e-07, "logits": -1.7291035652160645, "logps": -109.82331085205078, "loss": 0.3425, "objective": 0.33088982105255127, "ranking_simple": 0.7583333253860474, "regularize": 0.33088982105255127, "step": 620, "wo_beta": 3.5512850284576416 }, { "dpo_loss": 0.3090989291667938, "epoch": 1.7713745866792632, "grad_norm": 26.389383350627952, "learning_rate": 4.275037739658771e-07, "logits": -1.590990424156189, "logps": -110.23678588867188, "loss": 0.3234, "objective": 0.3090989291667938, "ranking_simple": 0.7250000238418579, "regularize": 0.3090989291667938, "step": 625, "wo_beta": 3.666703462600708 }, { "dpo_loss": 0.33155128359794617, "epoch": 1.7855455833726972, "grad_norm": 29.336966725897334, "learning_rate": 4.193340705265745e-07, "logits": -1.635860562324524, "logps": -114.24417114257812, "loss": 0.3204, "objective": 0.33155128359794617, "ranking_simple": 0.7083333134651184, "regularize": 0.33155128359794617, "step": 630, "wo_beta": 4.568333625793457 }, { "dpo_loss": 0.3347407877445221, "epoch": 1.7997165800661312, "grad_norm": 31.313014707447742, "learning_rate": 4.1118642034694565e-07, "logits": -1.7257325649261475, "logps": -116.03430938720703, "loss": 0.3376, "objective": 0.3347407877445221, "ranking_simple": 0.7250000238418579, "regularize": 0.3347407877445221, "step": 635, "wo_beta": 4.352345943450928 }, { "dpo_loss": 0.30713585019111633, "epoch": 1.8138875767595655, "grad_norm": 25.70905071821337, "learning_rate": 4.030630509131959e-07, "logits": -1.6731877326965332, "logps": -116.4244155883789, "loss": 0.3098, "objective": 0.30713585019111633, "ranking_simple": 0.7583333253860474, "regularize": 0.30713585019111633, "step": 640, "wo_beta": 4.2585673332214355 }, { "dpo_loss": 0.29875513911247253, "epoch": 1.8280585734529995, "grad_norm": 23.791400324977506, "learning_rate": 3.9496618307341713e-07, "logits": -1.7462607622146606, "logps": -116.83419036865234, "loss": 0.3005, "objective": 0.29875513911247253, "ranking_simple": 0.7791666388511658, "regularize": 0.29875513911247253, "step": 645, "wo_beta": 3.3428430557250977 }, { "dpo_loss": 0.32320088148117065, "epoch": 1.8422295701464337, "grad_norm": 32.03839388955998, "learning_rate": 3.8689803043042996e-07, "logits": -1.8062669038772583, "logps": -117.27250671386719, "loss": 0.3412, "objective": 0.32320088148117065, "ranking_simple": 0.7583333253860474, "regularize": 0.32320088148117065, "step": 650, "wo_beta": 3.019827127456665 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.7847943305969238, "eval_logits": -1.832274079322815, "eval_logps": -122.29435729980469, "eval_loss": 0.7749964594841003, "eval_objective": 0.7847943305969238, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.7847943305969238, "eval_runtime": 369.3337, "eval_samples_per_second": 15.677, "eval_steps_per_second": 1.308, "eval_wo_beta": 9.94935417175293, "step": 650 }, { "dpo_loss": 0.3198917508125305, "epoch": 1.8564005668398678, "grad_norm": 24.453465336538663, "learning_rate": 3.788607987366069e-07, "logits": -1.6524808406829834, "logps": -113.18647766113281, "loss": 0.333, "objective": 0.3198917508125305, "ranking_simple": 0.737500011920929, "regularize": 0.3198917508125305, "step": 655, "wo_beta": 3.779681444168091 }, { "dpo_loss": 0.30827051401138306, "epoch": 1.8705715635333018, "grad_norm": 25.30249944675523, "learning_rate": 3.708566852908418e-07, "logits": -1.722990870475769, "logps": -117.13253021240234, "loss": 0.3093, "objective": 0.30827051401138306, "ranking_simple": 0.7250000238418579, "regularize": 0.30827051401138306, "step": 660, "wo_beta": 3.8630990982055664 }, { "dpo_loss": 0.30362004041671753, "epoch": 1.8847425602267358, "grad_norm": 26.577906210584395, "learning_rate": 3.6288787833783016e-07, "logits": -1.6925681829452515, "logps": -114.92183685302734, "loss": 0.3287, "objective": 0.30362004041671753, "ranking_simple": 0.7749999761581421, "regularize": 0.30362004041671753, "step": 665, "wo_beta": 3.2407257556915283 }, { "dpo_loss": 0.33854812383651733, "epoch": 1.89891355692017, "grad_norm": 26.608491612605135, "learning_rate": 3.5495655646982503e-07, "logits": -1.632662057876587, "logps": -113.74718475341797, "loss": 0.3206, "objective": 0.33854812383651733, "ranking_simple": 0.7124999761581421, "regularize": 0.33854812383651733, "step": 670, "wo_beta": 4.751885890960693 }, { "dpo_loss": 0.30450791120529175, "epoch": 1.9130845536136043, "grad_norm": 27.707963800825, "learning_rate": 3.470648880310313e-07, "logits": -1.7001540660858154, "logps": -112.93498992919922, "loss": 0.3286, "objective": 0.30450791120529175, "ranking_simple": 0.800000011920929, "regularize": 0.30450791120529175, "step": 675, "wo_beta": 2.8013789653778076 }, { "dpo_loss": 0.29233846068382263, "epoch": 1.9272555503070383, "grad_norm": 26.318074274927174, "learning_rate": 3.3921503052480236e-07, "logits": -1.7435904741287231, "logps": -115.07746887207031, "loss": 0.3108, "objective": 0.29233846068382263, "ranking_simple": 0.7833333611488342, "regularize": 0.29233846068382263, "step": 680, "wo_beta": 3.6125741004943848 }, { "dpo_loss": 0.2992376685142517, "epoch": 1.9414265470004723, "grad_norm": 26.980724381093548, "learning_rate": 3.314091300237999e-07, "logits": -1.6790062189102173, "logps": -115.0765380859375, "loss": 0.2818, "objective": 0.2992376685142517, "ranking_simple": 0.7333333492279053, "regularize": 0.2992376685142517, "step": 685, "wo_beta": 4.020833969116211 }, { "dpo_loss": 0.2989169955253601, "epoch": 1.9555975436939064, "grad_norm": 28.956498459281256, "learning_rate": 3.236493205832794e-07, "logits": -1.7138378620147705, "logps": -116.5325927734375, "loss": 0.3112, "objective": 0.2989169955253601, "ranking_simple": 0.762499988079071, "regularize": 0.2989169955253601, "step": 690, "wo_beta": 3.235595941543579 }, { "dpo_loss": 0.3340277373790741, "epoch": 1.9697685403873406, "grad_norm": 29.008978725819244, "learning_rate": 3.15937723657661e-07, "logits": -1.5779744386672974, "logps": -117.33056640625, "loss": 0.3178, "objective": 0.3340277373790741, "ranking_simple": 0.7416666746139526, "regularize": 0.3340277373790741, "step": 695, "wo_beta": 4.477265357971191 }, { "dpo_loss": 0.3072899878025055, "epoch": 1.9839395370807746, "grad_norm": 26.91852361479357, "learning_rate": 3.082764475205442e-07, "logits": -1.5716139078140259, "logps": -120.74311065673828, "loss": 0.3156, "objective": 0.3072899878025055, "ranking_simple": 0.7583333253860474, "regularize": 0.3072899878025055, "step": 700, "wo_beta": 3.845552682876587 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.813927948474884, "eval_logits": -1.8338414430618286, "eval_logps": -126.39391326904297, "eval_loss": 0.8013313412666321, "eval_objective": 0.813927948474884, "eval_ranking_simple": 0.5377846956253052, "eval_regularize": 0.813927948474884, "eval_runtime": 375.3584, "eval_samples_per_second": 15.425, "eval_steps_per_second": 1.287, "eval_wo_beta": 10.32465934753418, "step": 700 }, { "dpo_loss": 0.3367713391780853, "epoch": 1.9981105337742089, "grad_norm": 30.634677985646626, "learning_rate": 3.006675866883275e-07, "logits": -1.486984133720398, "logps": -120.58149719238281, "loss": 0.3202, "objective": 0.3367713391780853, "ranking_simple": 0.699999988079071, "regularize": 0.3367713391780853, "step": 705, "wo_beta": 5.604025840759277 }, { "dpo_loss": 0.26506486535072327, "epoch": 2.012281530467643, "grad_norm": 21.811542358240928, "learning_rate": 2.931132213475884e-07, "logits": -1.652250051498413, "logps": -117.85489654541016, "loss": 0.247, "objective": 0.26506486535072327, "ranking_simple": 0.7958333492279053, "regularize": 0.26506486535072327, "step": 710, "wo_beta": 2.941725015640259 }, { "dpo_loss": 0.21083328127861023, "epoch": 2.026452527161077, "grad_norm": 22.009413556660768, "learning_rate": 2.856154167863814e-07, "logits": -1.7095146179199219, "logps": -118.10084533691406, "loss": 0.2213, "objective": 0.21083328127861023, "ranking_simple": 0.8166666626930237, "regularize": 0.21083328127861023, "step": 715, "wo_beta": 1.996678113937378 }, { "dpo_loss": 0.1858675181865692, "epoch": 2.040623523854511, "grad_norm": 19.31295851177097, "learning_rate": 2.7817622282960813e-07, "logits": -1.6148954629898071, "logps": -123.07051086425781, "loss": 0.2017, "objective": 0.1858675181865692, "ranking_simple": 0.824999988079071, "regularize": 0.1858675181865692, "step": 720, "wo_beta": 2.2542519569396973 }, { "dpo_loss": 0.2097151279449463, "epoch": 2.0547945205479454, "grad_norm": 24.65512776088815, "learning_rate": 2.707976732786166e-07, "logits": -1.686496376991272, "logps": -126.76118469238281, "loss": 0.2047, "objective": 0.2097151279449463, "ranking_simple": 0.8083333373069763, "regularize": 0.2097151279449463, "step": 725, "wo_beta": 2.5733823776245117 }, { "dpo_loss": 0.23622439801692963, "epoch": 2.0689655172413794, "grad_norm": 27.027861051451126, "learning_rate": 2.6348178535517965e-07, "logits": -1.7227706909179688, "logps": -126.69216918945312, "loss": 0.2175, "objective": 0.23622439801692963, "ranking_simple": 0.7749999761581421, "regularize": 0.23622439801692963, "step": 730, "wo_beta": 3.341784715652466 }, { "dpo_loss": 0.23194490373134613, "epoch": 2.0831365139348135, "grad_norm": 29.247659239837198, "learning_rate": 2.5623055915000686e-07, "logits": -1.6128872632980347, "logps": -126.96920776367188, "loss": 0.2248, "objective": 0.23194490373134613, "ranking_simple": 0.7875000238418579, "regularize": 0.23194490373134613, "step": 735, "wo_beta": 3.5192384719848633 }, { "dpo_loss": 0.2231958657503128, "epoch": 2.0973075106282475, "grad_norm": 25.19144911793584, "learning_rate": 2.490459770759398e-07, "logits": -1.6792032718658447, "logps": -123.05906677246094, "loss": 0.2328, "objective": 0.2231958657503128, "ranking_simple": 0.8125, "regularize": 0.2231958657503128, "step": 740, "wo_beta": 2.758615016937256 }, { "dpo_loss": 0.20981092751026154, "epoch": 2.1114785073216815, "grad_norm": 23.2038862143533, "learning_rate": 2.419300033259798e-07, "logits": -1.6360180377960205, "logps": -124.81270599365234, "loss": 0.2182, "objective": 0.20981092751026154, "ranking_simple": 0.7791666388511658, "regularize": 0.20981092751026154, "step": 745, "wo_beta": 3.1798312664031982 }, { "dpo_loss": 0.22788210213184357, "epoch": 2.1256495040151155, "grad_norm": 22.72550221909295, "learning_rate": 2.3488458333629773e-07, "logits": -1.6554747819900513, "logps": -122.24414825439453, "loss": 0.2183, "objective": 0.22788210213184357, "ranking_simple": 0.8041666746139526, "regularize": 0.22788210213184357, "step": 750, "wo_beta": 2.6969196796417236 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.8603518009185791, "eval_logits": -1.7998822927474976, "eval_logps": -131.12570190429688, "eval_loss": 0.8466524481773376, "eval_objective": 0.8603518009185791, "eval_ranking_simple": 0.5351966619491577, "eval_regularize": 0.8603518009185791, "eval_runtime": 370.8383, "eval_samples_per_second": 15.613, "eval_steps_per_second": 1.302, "eval_wo_beta": 10.893115997314453, "step": 750 }, { "dpo_loss": 0.2121828943490982, "epoch": 2.13982050070855, "grad_norm": 20.710190502520984, "learning_rate": 2.2791164325437046e-07, "logits": -1.6399970054626465, "logps": -126.25770568847656, "loss": 0.2128, "objective": 0.2121828943490982, "ranking_simple": 0.8125, "regularize": 0.2121828943490982, "step": 755, "wo_beta": 3.0046989917755127 }, { "dpo_loss": 0.24169200658798218, "epoch": 2.153991497401984, "grad_norm": 22.67213482006544, "learning_rate": 2.21013089412392e-07, "logits": -1.6009403467178345, "logps": -119.49600219726562, "loss": 0.2344, "objective": 0.24169200658798218, "ranking_simple": 0.7916666865348816, "regularize": 0.24169200658798218, "step": 760, "wo_beta": 2.758802890777588 }, { "dpo_loss": 0.2243640273809433, "epoch": 2.168162494095418, "grad_norm": 20.64816775047569, "learning_rate": 2.1419080780610122e-07, "logits": -1.6553146839141846, "logps": -120.87618255615234, "loss": 0.2361, "objective": 0.2243640273809433, "ranking_simple": 0.8208333253860474, "regularize": 0.2243640273809433, "step": 765, "wo_beta": 2.57806396484375 }, { "dpo_loss": 0.2182272970676422, "epoch": 2.182333490788852, "grad_norm": 24.316819536544447, "learning_rate": 2.0744666357916925e-07, "logits": -1.6933544874191284, "logps": -120.17996978759766, "loss": 0.2274, "objective": 0.2182272970676422, "ranking_simple": 0.7916666865348816, "regularize": 0.2182272970676422, "step": 770, "wo_beta": 2.544562816619873 }, { "dpo_loss": 0.20803479850292206, "epoch": 2.196504487482286, "grad_norm": 23.36541458579555, "learning_rate": 2.0078250051328782e-07, "logits": -1.6226826906204224, "logps": -122.02224731445312, "loss": 0.2183, "objective": 0.20803479850292206, "ranking_simple": 0.8208333253860474, "regularize": 0.20803479850292206, "step": 775, "wo_beta": 2.7029988765716553 }, { "dpo_loss": 0.22727744281291962, "epoch": 2.21067548417572, "grad_norm": 24.538113581222706, "learning_rate": 1.942001405240979e-07, "logits": -1.5558067560195923, "logps": -124.58464813232422, "loss": 0.2171, "objective": 0.22727744281291962, "ranking_simple": 0.7541666626930237, "regularize": 0.22727744281291962, "step": 780, "wo_beta": 3.0669195652008057 }, { "dpo_loss": 0.21583274006843567, "epoch": 2.2248464808691546, "grad_norm": 28.544460286246334, "learning_rate": 1.877013831630961e-07, "logits": -1.5640733242034912, "logps": -123.09439849853516, "loss": 0.2152, "objective": 0.21583274006843567, "ranking_simple": 0.7958333492279053, "regularize": 0.21583274006843567, "step": 785, "wo_beta": 3.0127415657043457 }, { "dpo_loss": 0.1936068832874298, "epoch": 2.2390174775625886, "grad_norm": 23.5808144420256, "learning_rate": 1.812880051256551e-07, "logits": -1.5408331155776978, "logps": -125.1629638671875, "loss": 0.2122, "objective": 0.1936068832874298, "ranking_simple": 0.8583333492279053, "regularize": 0.1936068832874298, "step": 790, "wo_beta": 2.2015647888183594 }, { "dpo_loss": 0.21609367430210114, "epoch": 2.2531884742560226, "grad_norm": 23.539955858336363, "learning_rate": 1.7496175976529337e-07, "logits": -1.6351306438446045, "logps": -125.9599609375, "loss": 0.2139, "objective": 0.21609367430210114, "ranking_simple": 0.8374999761581421, "regularize": 0.21609367430210114, "step": 795, "wo_beta": 2.052849292755127 }, { "dpo_loss": 0.2304428219795227, "epoch": 2.2673594709494567, "grad_norm": 30.254441621642492, "learning_rate": 1.6872437661432516e-07, "logits": -1.6573865413665771, "logps": -127.17088317871094, "loss": 0.2338, "objective": 0.2304428219795227, "ranking_simple": 0.8291666507720947, "regularize": 0.2304428219795227, "step": 800, "wo_beta": 2.813727617263794 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.864130973815918, "eval_logits": -1.8069664239883423, "eval_logps": -132.1160125732422, "eval_loss": 0.8479817509651184, "eval_objective": 0.864130973815918, "eval_ranking_simple": 0.5351966619491577, "eval_regularize": 0.864130973815918, "eval_runtime": 369.7595, "eval_samples_per_second": 15.659, "eval_steps_per_second": 1.306, "eval_wo_beta": 10.98104476928711, "step": 800 }, { "dpo_loss": 0.24076558649539948, "epoch": 2.2815304676428907, "grad_norm": 22.143907441363098, "learning_rate": 1.62577560911024e-07, "logits": -1.645892858505249, "logps": -124.22594451904297, "loss": 0.2325, "objective": 0.24076558649539948, "ranking_simple": 0.762499988079071, "regularize": 0.24076558649539948, "step": 805, "wo_beta": 3.4595048427581787 }, { "dpo_loss": 0.2532159686088562, "epoch": 2.295701464336325, "grad_norm": 22.775287720493225, "learning_rate": 1.565229931334277e-07, "logits": -1.7806832790374756, "logps": -121.71713256835938, "loss": 0.229, "objective": 0.2532159686088562, "ranking_simple": 0.8041666746139526, "regularize": 0.2532159686088562, "step": 810, "wo_beta": 2.6662535667419434 }, { "dpo_loss": 0.21587124466896057, "epoch": 2.309872461029759, "grad_norm": 20.40928105754396, "learning_rate": 1.5056232853991208e-07, "logits": -1.6817148923873901, "logps": -123.68048858642578, "loss": 0.1989, "objective": 0.21587124466896057, "ranking_simple": 0.800000011920929, "regularize": 0.21587124466896057, "step": 815, "wo_beta": 2.549884557723999 }, { "dpo_loss": 0.21313165128231049, "epoch": 2.324043457723193, "grad_norm": 24.789515132235113, "learning_rate": 1.4469719671666043e-07, "logits": -1.6495434045791626, "logps": -124.32107543945312, "loss": 0.2149, "objective": 0.21313165128231049, "ranking_simple": 0.8083333373069763, "regularize": 0.21313165128231049, "step": 820, "wo_beta": 2.404456377029419 }, { "dpo_loss": 0.21146027743816376, "epoch": 2.3382144544166272, "grad_norm": 21.660632425555523, "learning_rate": 1.389292011321498e-07, "logits": -1.692821979522705, "logps": -122.83853149414062, "loss": 0.2168, "objective": 0.21146027743816376, "ranking_simple": 0.8166666626930237, "regularize": 0.21146027743816376, "step": 825, "wo_beta": 2.4975247383117676 }, { "dpo_loss": 0.19700536131858826, "epoch": 2.3523854511100613, "grad_norm": 27.701358075237533, "learning_rate": 1.3325991869878012e-07, "logits": -1.6795495748519897, "logps": -126.65140533447266, "loss": 0.1967, "objective": 0.19700536131858826, "ranking_simple": 0.8374999761581421, "regularize": 0.19700536131858826, "step": 830, "wo_beta": 2.146075487136841 }, { "dpo_loss": 0.21794618666172028, "epoch": 2.3665564478034957, "grad_norm": 29.238202346662906, "learning_rate": 1.2769089934176126e-07, "logits": -1.647661566734314, "logps": -125.17839813232422, "loss": 0.2288, "objective": 0.21794618666172028, "ranking_simple": 0.8416666388511658, "regularize": 0.21794618666172028, "step": 835, "wo_beta": 2.8974695205688477 }, { "dpo_loss": 0.21900226175785065, "epoch": 2.3807274444969297, "grad_norm": 24.009821339334245, "learning_rate": 1.222236655753791e-07, "logits": -1.6093765497207642, "logps": -125.18759155273438, "loss": 0.2184, "objective": 0.21900226175785065, "ranking_simple": 0.7833333611488342, "regularize": 0.21900226175785065, "step": 840, "wo_beta": 2.749152898788452 }, { "dpo_loss": 0.24857226014137268, "epoch": 2.3948984411903638, "grad_norm": 27.5079137044431, "learning_rate": 1.1685971208675538e-07, "logits": -1.6249310970306396, "logps": -127.47116088867188, "loss": 0.2262, "objective": 0.24857226014137268, "ranking_simple": 0.7916666865348816, "regularize": 0.24857226014137268, "step": 845, "wo_beta": 2.7568535804748535 }, { "dpo_loss": 0.2085387259721756, "epoch": 2.409069437883798, "grad_norm": 25.878018469347772, "learning_rate": 1.1160050532721527e-07, "logits": -1.67723548412323, "logps": -125.76563262939453, "loss": 0.2015, "objective": 0.2085387259721756, "ranking_simple": 0.7958333492279053, "regularize": 0.2085387259721756, "step": 850, "wo_beta": 2.600330352783203 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.8720477223396301, "eval_logits": -1.80176842212677, "eval_logps": -133.381103515625, "eval_loss": 0.8571510910987854, "eval_objective": 0.8720477223396301, "eval_ranking_simple": 0.5377846956253052, "eval_regularize": 0.8720477223396301, "eval_runtime": 369.6782, "eval_samples_per_second": 15.662, "eval_steps_per_second": 1.307, "eval_wo_beta": 11.025221824645996, "step": 850 }, { "dpo_loss": 0.2096593677997589, "epoch": 2.423240434577232, "grad_norm": 25.499456648461173, "learning_rate": 1.0644748311137375e-07, "logits": -1.6905667781829834, "logps": -123.53242492675781, "loss": 0.2195, "objective": 0.2096593677997589, "ranking_simple": 0.8458333611488342, "regularize": 0.2096593677997589, "step": 855, "wo_beta": 1.9907649755477905 }, { "dpo_loss": 0.21979431807994843, "epoch": 2.4374114312706663, "grad_norm": 24.848581285581997, "learning_rate": 1.0140205422405212e-07, "logits": -1.6141736507415771, "logps": -127.53914642333984, "loss": 0.2157, "objective": 0.21979431807994843, "ranking_simple": 0.7916666865348816, "regularize": 0.21979431807994843, "step": 860, "wo_beta": 3.0604774951934814 }, { "dpo_loss": 0.2267284095287323, "epoch": 2.4515824279641003, "grad_norm": 26.5384738263413, "learning_rate": 9.646559803512993e-08, "logits": -1.6312936544418335, "logps": -125.63304138183594, "loss": 0.2262, "objective": 0.2267284095287323, "ranking_simple": 0.7708333134651184, "regularize": 0.2267284095287323, "step": 865, "wo_beta": 3.2560784816741943 }, { "dpo_loss": 0.196714848279953, "epoch": 2.4657534246575343, "grad_norm": 22.187392253095393, "learning_rate": 9.163946412243895e-08, "logits": -1.6977574825286865, "logps": -125.96756744384766, "loss": 0.2037, "objective": 0.196714848279953, "ranking_simple": 0.824999988079071, "regularize": 0.196714848279953, "step": 870, "wo_beta": 2.583970308303833 }, { "dpo_loss": 0.1924598515033722, "epoch": 2.4799244213509684, "grad_norm": 25.12579870814084, "learning_rate": 8.692497190280224e-08, "logits": -1.5863568782806396, "logps": -126.04496765136719, "loss": 0.2038, "objective": 0.1924598515033722, "ranking_simple": 0.8583333492279053, "regularize": 0.1924598515033722, "step": 875, "wo_beta": 2.296090602874756 }, { "dpo_loss": 0.2359813004732132, "epoch": 2.4940954180444024, "grad_norm": 23.176269104907163, "learning_rate": 8.232341027131883e-08, "logits": -1.5722264051437378, "logps": -125.54722595214844, "loss": 0.2224, "objective": 0.2359813004732132, "ranking_simple": 0.7666666507720947, "regularize": 0.2359813004732132, "step": 880, "wo_beta": 3.9758193492889404 }, { "dpo_loss": 0.2542850375175476, "epoch": 2.5082664147378364, "grad_norm": 28.503952001031568, "learning_rate": 7.783603724899257e-08, "logits": -1.724973201751709, "logps": -123.99037170410156, "loss": 0.2326, "objective": 0.2542850375175476, "ranking_simple": 0.7958333492279053, "regularize": 0.2542850375175476, "step": 885, "wo_beta": 3.509568929672241 }, { "dpo_loss": 0.20098893344402313, "epoch": 2.5224374114312704, "grad_norm": 24.488143250342084, "learning_rate": 7.346407963880136e-08, "logits": -1.595066785812378, "logps": -123.90083312988281, "loss": 0.2056, "objective": 0.20098893344402313, "ranking_simple": 0.8291666507720947, "regularize": 0.20098893344402313, "step": 890, "wo_beta": 2.0867483615875244 }, { "dpo_loss": 0.22675950825214386, "epoch": 2.536608408124705, "grad_norm": 26.1383864365769, "learning_rate": 6.92087326903022e-08, "logits": -1.6463298797607422, "logps": -126.22193908691406, "loss": 0.2235, "objective": 0.22675950825214386, "ranking_simple": 0.7833333611488342, "regularize": 0.22675950825214386, "step": 895, "wo_beta": 3.0455052852630615 }, { "dpo_loss": 0.26797404885292053, "epoch": 2.550779404818139, "grad_norm": 29.177118299023412, "learning_rate": 6.507115977286143e-08, "logits": -1.5577763319015503, "logps": -123.61392974853516, "loss": 0.2348, "objective": 0.26797404885292053, "ranking_simple": 0.7749999761581421, "regularize": 0.26797404885292053, "step": 900, "wo_beta": 3.1443541049957275 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.8675441145896912, "eval_logits": -1.8113691806793213, "eval_logps": -133.67962646484375, "eval_loss": 0.8529655933380127, "eval_objective": 0.8675441145896912, "eval_ranking_simple": 0.5377846956253052, "eval_regularize": 0.8675441145896912, "eval_runtime": 369.0633, "eval_samples_per_second": 15.688, "eval_steps_per_second": 1.309, "eval_wo_beta": 10.942305564880371, "step": 900 }, { "dpo_loss": 0.20284312963485718, "epoch": 2.564950401511573, "grad_norm": 19.950697729864736, "learning_rate": 6.105249205760127e-08, "logits": -1.6392539739608765, "logps": -123.07585906982422, "loss": 0.2053, "objective": 0.20284312963485718, "ranking_simple": 0.8333333134651184, "regularize": 0.20284312963485718, "step": 905, "wo_beta": 2.37481951713562 }, { "dpo_loss": 0.23266027867794037, "epoch": 2.579121398205007, "grad_norm": 30.055735152469072, "learning_rate": 5.7153828208148846e-08, "logits": -1.6491973400115967, "logps": -127.52837371826172, "loss": 0.217, "objective": 0.23266027867794037, "ranking_simple": 0.800000011920929, "regularize": 0.23266027867794037, "step": 910, "wo_beta": 3.36558198928833 }, { "dpo_loss": 0.23302534222602844, "epoch": 2.593292394898441, "grad_norm": 22.890822851586893, "learning_rate": 5.337623408027292e-08, "logits": -1.7552512884140015, "logps": -127.87796783447266, "loss": 0.2121, "objective": 0.23302534222602844, "ranking_simple": 0.8208333253860474, "regularize": 0.23302534222602844, "step": 915, "wo_beta": 2.502667188644409 }, { "dpo_loss": 0.20162495970726013, "epoch": 2.6074633915918755, "grad_norm": 24.20273693189473, "learning_rate": 4.972074243048896e-08, "logits": -1.5814868211746216, "logps": -127.01163482666016, "loss": 0.2068, "objective": 0.20162495970726013, "ranking_simple": 0.8333333134651184, "regularize": 0.20162495970726013, "step": 920, "wo_beta": 1.8146071434020996 }, { "dpo_loss": 0.21291884779930115, "epoch": 2.6216343882853095, "grad_norm": 22.388349767286606, "learning_rate": 4.6188352633713956e-08, "logits": -1.5830769538879395, "logps": -126.23300170898438, "loss": 0.2195, "objective": 0.21291884779930115, "ranking_simple": 0.800000011920929, "regularize": 0.21291884779930115, "step": 925, "wo_beta": 3.1612021923065186 }, { "dpo_loss": 0.24175626039505005, "epoch": 2.6358053849787435, "grad_norm": 21.949168917598172, "learning_rate": 4.2780030410047796e-08, "logits": -1.6357704401016235, "logps": -125.97001647949219, "loss": 0.2179, "objective": 0.24175626039505005, "ranking_simple": 0.7708333134651184, "regularize": 0.24175626039505005, "step": 930, "wo_beta": 3.436295747756958 }, { "dpo_loss": 0.22544310986995697, "epoch": 2.6499763816721775, "grad_norm": 28.54777226492267, "learning_rate": 3.949670756075446e-08, "logits": -1.6567264795303345, "logps": -123.90028381347656, "loss": 0.2428, "objective": 0.22544310986995697, "ranking_simple": 0.8125, "regularize": 0.22544310986995697, "step": 935, "wo_beta": 2.002594470977783 }, { "dpo_loss": 0.19537684321403503, "epoch": 2.6641473783656116, "grad_norm": 27.292491316004774, "learning_rate": 3.63392817135173e-08, "logits": -1.6852660179138184, "logps": -128.0804901123047, "loss": 0.2163, "objective": 0.19537684321403503, "ranking_simple": 0.8083333373069763, "regularize": 0.19537684321403503, "step": 940, "wo_beta": 3.2341361045837402 }, { "dpo_loss": 0.21889939904212952, "epoch": 2.678318375059046, "grad_norm": 23.60505865361295, "learning_rate": 3.330861607703611e-08, "logits": -1.7046397924423218, "logps": -125.53500366210938, "loss": 0.206, "objective": 0.21889939904212952, "ranking_simple": 0.7875000238418579, "regularize": 0.21889939904212952, "step": 945, "wo_beta": 3.0100159645080566 }, { "dpo_loss": 0.20888622105121613, "epoch": 2.69248937175248, "grad_norm": 24.640589732429273, "learning_rate": 3.040553920503502e-08, "logits": -1.5867191553115845, "logps": -123.99597930908203, "loss": 0.2268, "objective": 0.20888622105121613, "ranking_simple": 0.8208333253860474, "regularize": 0.20888622105121613, "step": 950, "wo_beta": 2.38991379737854 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.8684250116348267, "eval_logits": -1.8135783672332764, "eval_logps": -133.2828826904297, "eval_loss": 0.8525474667549133, "eval_objective": 0.8684250116348267, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.8684250116348267, "eval_runtime": 373.8008, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.292, "eval_wo_beta": 10.9784517288208, "step": 950 }, { "dpo_loss": 0.21907640993595123, "epoch": 2.706660368445914, "grad_norm": 25.73877923734523, "learning_rate": 2.7630844769743756e-08, "logits": -1.696962833404541, "logps": -125.86216735839844, "loss": 0.217, "objective": 0.21907640993595123, "ranking_simple": 0.8333333134651184, "regularize": 0.21907640993595123, "step": 955, "wo_beta": 2.471001148223877 }, { "dpo_loss": 0.24159465730190277, "epoch": 2.720831365139348, "grad_norm": 27.398682783569342, "learning_rate": 2.4985291344915673e-08, "logits": -1.6559653282165527, "logps": -125.80078125, "loss": 0.2243, "objective": 0.24159465730190277, "ranking_simple": 0.8041666746139526, "regularize": 0.24159465730190277, "step": 960, "wo_beta": 3.4911081790924072 }, { "dpo_loss": 0.17974473536014557, "epoch": 2.735002361832782, "grad_norm": 26.129978647933363, "learning_rate": 2.2469602198441573e-08, "logits": -1.6600605249404907, "logps": -125.82157897949219, "loss": 0.2216, "objective": 0.17974473536014557, "ranking_simple": 0.8458333611488342, "regularize": 0.17974473536014557, "step": 965, "wo_beta": 1.8301441669464111 }, { "dpo_loss": 0.2389921396970749, "epoch": 2.7491733585262166, "grad_norm": 24.93627247502558, "learning_rate": 2.008446509461498e-08, "logits": -1.6814639568328857, "logps": -127.4894027709961, "loss": 0.2119, "objective": 0.2389921396970749, "ranking_simple": 0.8333333134651184, "regularize": 0.2389921396970749, "step": 970, "wo_beta": 2.071624517440796 }, { "dpo_loss": 0.21195697784423828, "epoch": 2.7633443552196506, "grad_norm": 24.386661793540505, "learning_rate": 1.7830532106104746e-08, "logits": -1.6035431623458862, "logps": -124.46774291992188, "loss": 0.2154, "objective": 0.21195697784423828, "ranking_simple": 0.8333333134651184, "regularize": 0.21195697784423828, "step": 975, "wo_beta": 3.027677059173584 }, { "dpo_loss": 0.2177601158618927, "epoch": 2.7775153519130846, "grad_norm": 30.722672013551914, "learning_rate": 1.570841943568446e-08, "logits": -1.7569483518600464, "logps": -126.10541534423828, "loss": 0.2135, "objective": 0.2177601158618927, "ranking_simple": 0.824999988079071, "regularize": 0.2177601158618927, "step": 980, "wo_beta": 1.981053113937378 }, { "dpo_loss": 0.21660226583480835, "epoch": 2.7916863486065187, "grad_norm": 24.268020278235447, "learning_rate": 1.3718707247769134e-08, "logits": -1.5750867128372192, "logps": -122.41696166992188, "loss": 0.2142, "objective": 0.21660226583480835, "ranking_simple": 0.8208333253860474, "regularize": 0.21660226583480835, "step": 985, "wo_beta": 2.762291431427002 }, { "dpo_loss": 0.23230423033237457, "epoch": 2.8058573452999527, "grad_norm": 25.3957119119687, "learning_rate": 1.1861939509803686e-08, "logits": -1.626227617263794, "logps": -125.03184509277344, "loss": 0.2309, "objective": 0.23230423033237457, "ranking_simple": 0.7916666865348816, "regularize": 0.23230423033237457, "step": 990, "wo_beta": 3.499976396560669 }, { "dpo_loss": 0.22918492555618286, "epoch": 2.820028341993387, "grad_norm": 26.589296858875272, "learning_rate": 1.0138623843548078e-08, "logits": -1.691062092781067, "logps": -126.53192901611328, "loss": 0.2269, "objective": 0.22918492555618286, "ranking_simple": 0.7916666865348816, "regularize": 0.22918492555618286, "step": 995, "wo_beta": 3.2248499393463135 }, { "dpo_loss": 0.22611786425113678, "epoch": 2.8341993386868207, "grad_norm": 23.891964045353923, "learning_rate": 8.54923138629815e-09, "logits": -1.6503469944000244, "logps": -122.57675170898438, "loss": 0.2198, "objective": 0.22611786425113678, "ranking_simple": 0.8333333134651184, "regularize": 0.22611786425113678, "step": 1000, "wo_beta": 2.427152395248413 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.8652148246765137, "eval_logits": -1.8167296648025513, "eval_logps": -132.88087463378906, "eval_loss": 0.8492961525917053, "eval_objective": 0.8652148246765137, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 0.8652148246765137, "eval_runtime": 368.2335, "eval_samples_per_second": 15.724, "eval_steps_per_second": 1.312, "eval_wo_beta": 10.938254356384277, "step": 1000 }, { "dpo_loss": 0.24608786404132843, "epoch": 2.848370335380255, "grad_norm": 27.602968631540815, "learning_rate": 7.09419666208183e-09, "logits": -1.6333565711975098, "logps": -123.5007553100586, "loss": 0.2273, "objective": 0.24608786404132843, "ranking_simple": 0.7916666865348816, "regularize": 0.24608786404132843, "step": 1005, "wo_beta": 3.1694223880767822 }, { "dpo_loss": 0.20009997487068176, "epoch": 2.862541332073689, "grad_norm": 25.090299269603783, "learning_rate": 5.773917462864264e-09, "logits": -1.6704093217849731, "logps": -125.71635437011719, "loss": 0.2037, "objective": 0.20009997487068176, "ranking_simple": 0.800000011920929, "regularize": 0.20009997487068176, "step": 1010, "wo_beta": 2.577218532562256 }, { "dpo_loss": 0.21036744117736816, "epoch": 2.8767123287671232, "grad_norm": 28.329606007932064, "learning_rate": 4.588754739795586e-09, "logits": -1.6348304748535156, "logps": -122.55280303955078, "loss": 0.2113, "objective": 0.21036744117736816, "ranking_simple": 0.8083333373069763, "regularize": 0.21036744117736816, "step": 1015, "wo_beta": 2.199587345123291 }, { "dpo_loss": 0.21461248397827148, "epoch": 2.8908833254605573, "grad_norm": 24.62375043047653, "learning_rate": 3.53903250453047e-09, "logits": -1.5652154684066772, "logps": -125.25798797607422, "loss": 0.2252, "objective": 0.21461248397827148, "ranking_simple": 0.7708333134651184, "regularize": 0.21461248397827148, "step": 1020, "wo_beta": 3.251589059829712 }, { "dpo_loss": 0.23392058908939362, "epoch": 2.9050543221539913, "grad_norm": 25.1548357787363, "learning_rate": 2.6250377406467627e-09, "logits": -1.6649322509765625, "logps": -124.19398498535156, "loss": 0.2291, "objective": 0.23392058908939362, "ranking_simple": 0.8083333373069763, "regularize": 0.23392058908939362, "step": 1025, "wo_beta": 2.934882879257202 }, { "dpo_loss": 0.19529716670513153, "epoch": 2.9192253188474258, "grad_norm": 23.978894569817292, "learning_rate": 1.8470203251865768e-09, "logits": -1.7404934167861938, "logps": -123.60317993164062, "loss": 0.2177, "objective": 0.19529716670513153, "ranking_simple": 0.7666666507720947, "regularize": 0.19529716670513153, "step": 1030, "wo_beta": 2.584693670272827 }, { "dpo_loss": 0.19891007244586945, "epoch": 2.9333963155408598, "grad_norm": 22.489637587674896, "learning_rate": 1.2051929603428823e-09, "logits": -1.6503539085388184, "logps": -126.50735473632812, "loss": 0.2124, "objective": 0.19891007244586945, "ranking_simple": 0.8125, "regularize": 0.19891007244586945, "step": 1035, "wo_beta": 2.176048755645752 }, { "dpo_loss": 0.21993538737297058, "epoch": 2.947567312234294, "grad_norm": 23.591827890954175, "learning_rate": 6.997311153086882e-10, "logits": -1.6743519306182861, "logps": -125.27057647705078, "loss": 0.2151, "objective": 0.21993538737297058, "ranking_simple": 0.800000011920929, "regularize": 0.21993538737297058, "step": 1040, "wo_beta": 3.0266594886779785 }, { "dpo_loss": 0.22178266942501068, "epoch": 2.961738308927728, "grad_norm": 26.41640596804321, "learning_rate": 3.3077297830541585e-10, "logits": -1.6230467557907104, "logps": -128.82781982421875, "loss": 0.2256, "objective": 0.22178266942501068, "ranking_simple": 0.8041666746139526, "regularize": 0.22178266942501068, "step": 1045, "wo_beta": 3.258082866668701 }, { "dpo_loss": 0.21878859400749207, "epoch": 2.975909305621162, "grad_norm": 25.001515767941786, "learning_rate": 9.841941880361914e-11, "logits": -1.6594524383544922, "logps": -121.61707305908203, "loss": 0.2221, "objective": 0.21878859400749207, "ranking_simple": 0.7958333492279053, "regularize": 0.21878859400749207, "step": 1050, "wo_beta": 2.6373982429504395 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.8653033375740051, "eval_logits": -1.816504955291748, "eval_logps": -132.85667419433594, "eval_loss": 0.8493290543556213, "eval_objective": 0.8653033375740051, "eval_ranking_simple": 0.534679114818573, "eval_regularize": 0.8653033375740051, "eval_runtime": 369.7292, "eval_samples_per_second": 15.66, "eval_steps_per_second": 1.306, "eval_wo_beta": 10.94184398651123, "step": 1050 }, { "dpo_loss": 0.22742925584316254, "epoch": 2.9900803023145963, "grad_norm": 25.264967316936268, "learning_rate": 2.7339599464326622e-12, "logits": -1.6433926820755005, "logps": -123.5830078125, "loss": 0.2221, "objective": 0.22742925584316254, "ranking_simple": 0.800000011920929, "regularize": 0.22742925584316254, "step": 1055, "wo_beta": 2.7176921367645264 }, { "epoch": 2.992914501653283, "step": 1056, "total_flos": 0.0, "train_loss": 0.37872021553586377, "train_runtime": 34657.2213, "train_samples_per_second": 4.398, "train_steps_per_second": 0.03 } ], "logging_steps": 5, "max_steps": 1056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }