|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.992914501653283, |
|
"eval_steps": 50, |
|
"global_step": 1056, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 18.39703870091124, |
|
"learning_rate": 9.433962264150943e-09, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 0.6931, |
|
"objective": 0.6931471824645996, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6931471824645996, |
|
"step": 1, |
|
"wo_beta": 5.2708282470703125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930365562438965, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 18.759209504732585, |
|
"learning_rate": 4.7169811320754715e-08, |
|
"logits": -1.429247498512268, |
|
"logps": -83.84073638916016, |
|
"loss": 0.6931, |
|
"objective": 0.6930365562438965, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.6930365562438965, |
|
"step": 5, |
|
"wo_beta": 7.670312404632568 |
|
}, |
|
{ |
|
"dpo_loss": 0.6933786869049072, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 20.439115048712264, |
|
"learning_rate": 9.433962264150943e-08, |
|
"logits": -1.40127694606781, |
|
"logps": -84.69605255126953, |
|
"loss": 0.6932, |
|
"objective": 0.6933786869049072, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.6933786869049072, |
|
"step": 10, |
|
"wo_beta": 5.937962055206299 |
|
}, |
|
{ |
|
"dpo_loss": 0.6909440755844116, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 18.777743501771415, |
|
"learning_rate": 1.4150943396226414e-07, |
|
"logits": -1.5383602380752563, |
|
"logps": -84.21109771728516, |
|
"loss": 0.6921, |
|
"objective": 0.6909440755844116, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6909440755844116, |
|
"step": 15, |
|
"wo_beta": 6.64866828918457 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903365254402161, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 18.03474083688221, |
|
"learning_rate": 1.8867924528301886e-07, |
|
"logits": -1.3631871938705444, |
|
"logps": -83.19156646728516, |
|
"loss": 0.6904, |
|
"objective": 0.6903365254402161, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.6903365254402161, |
|
"step": 20, |
|
"wo_beta": 8.294000625610352 |
|
}, |
|
{ |
|
"dpo_loss": 0.6920856237411499, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 17.418659582335064, |
|
"learning_rate": 2.3584905660377358e-07, |
|
"logits": -1.3736237287521362, |
|
"logps": -83.09373474121094, |
|
"loss": 0.6904, |
|
"objective": 0.6920856237411499, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6920856237411499, |
|
"step": 25, |
|
"wo_beta": 6.352960109710693 |
|
}, |
|
{ |
|
"dpo_loss": 0.686029851436615, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 16.102325248569215, |
|
"learning_rate": 2.830188679245283e-07, |
|
"logits": -1.4392812252044678, |
|
"logps": -82.93743133544922, |
|
"loss": 0.6866, |
|
"objective": 0.686029851436615, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.686029851436615, |
|
"step": 30, |
|
"wo_beta": 8.934405326843262 |
|
}, |
|
{ |
|
"dpo_loss": 0.692076563835144, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 17.863783183076404, |
|
"learning_rate": 3.30188679245283e-07, |
|
"logits": -1.4204154014587402, |
|
"logps": -82.21839904785156, |
|
"loss": 0.6871, |
|
"objective": 0.692076563835144, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.692076563835144, |
|
"step": 35, |
|
"wo_beta": 6.40147590637207 |
|
}, |
|
{ |
|
"dpo_loss": 0.68491530418396, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 19.094434967317397, |
|
"learning_rate": 3.773584905660377e-07, |
|
"logits": -1.4201196432113647, |
|
"logps": -82.76591491699219, |
|
"loss": 0.6809, |
|
"objective": 0.68491530418396, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.68491530418396, |
|
"step": 40, |
|
"wo_beta": 6.802278518676758 |
|
}, |
|
{ |
|
"dpo_loss": 0.681048572063446, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 19.32762735250478, |
|
"learning_rate": 4.2452830188679244e-07, |
|
"logits": -1.448870301246643, |
|
"logps": -82.9134292602539, |
|
"loss": 0.6834, |
|
"objective": 0.681048572063446, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.681048572063446, |
|
"step": 45, |
|
"wo_beta": 6.972365856170654 |
|
}, |
|
{ |
|
"dpo_loss": 0.676356852054596, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 18.06878353401125, |
|
"learning_rate": 4.7169811320754717e-07, |
|
"logits": -1.4718233346939087, |
|
"logps": -83.58888244628906, |
|
"loss": 0.6719, |
|
"objective": 0.676356852054596, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.676356852054596, |
|
"step": 50, |
|
"wo_beta": 7.712605953216553 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6879124045372009, |
|
"eval_logits": -1.4697412252426147, |
|
"eval_logps": -89.67761993408203, |
|
"eval_loss": 0.685627281665802, |
|
"eval_objective": 0.6879124045372009, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 0.6879124045372009, |
|
"eval_runtime": 368.3427, |
|
"eval_samples_per_second": 15.719, |
|
"eval_steps_per_second": 1.311, |
|
"eval_wo_beta": 7.922134876251221, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6702864766120911, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 19.335503669780774, |
|
"learning_rate": 5.188679245283019e-07, |
|
"logits": -1.4462206363677979, |
|
"logps": -83.2486801147461, |
|
"loss": 0.6744, |
|
"objective": 0.6702864766120911, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.6702864766120911, |
|
"step": 55, |
|
"wo_beta": 7.6053338050842285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6743167638778687, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 19.32608740622528, |
|
"learning_rate": 5.660377358490566e-07, |
|
"logits": -1.3781672716140747, |
|
"logps": -84.30441284179688, |
|
"loss": 0.6703, |
|
"objective": 0.6743167638778687, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6743167638778687, |
|
"step": 60, |
|
"wo_beta": 6.470597267150879 |
|
}, |
|
{ |
|
"dpo_loss": 0.6568139791488647, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 21.002392018495687, |
|
"learning_rate": 6.132075471698112e-07, |
|
"logits": -1.4774647951126099, |
|
"logps": -81.55781555175781, |
|
"loss": 0.6619, |
|
"objective": 0.6568139791488647, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6568139791488647, |
|
"step": 65, |
|
"wo_beta": 6.502650260925293 |
|
}, |
|
{ |
|
"dpo_loss": 0.6504150629043579, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 18.63585310886347, |
|
"learning_rate": 6.60377358490566e-07, |
|
"logits": -1.5209298133850098, |
|
"logps": -84.6080551147461, |
|
"loss": 0.6501, |
|
"objective": 0.6504150629043579, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.6504150629043579, |
|
"step": 70, |
|
"wo_beta": 6.957874774932861 |
|
}, |
|
{ |
|
"dpo_loss": 0.653372049331665, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 20.115959212821757, |
|
"learning_rate": 7.075471698113207e-07, |
|
"logits": -1.4559004306793213, |
|
"logps": -85.126953125, |
|
"loss": 0.6557, |
|
"objective": 0.653372049331665, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.653372049331665, |
|
"step": 75, |
|
"wo_beta": 6.480815410614014 |
|
}, |
|
{ |
|
"dpo_loss": 0.6524909734725952, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 23.881541586489003, |
|
"learning_rate": 7.547169811320754e-07, |
|
"logits": -1.567487120628357, |
|
"logps": -84.74055480957031, |
|
"loss": 0.6601, |
|
"objective": 0.6524909734725952, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6524909734725952, |
|
"step": 80, |
|
"wo_beta": 6.057122707366943 |
|
}, |
|
{ |
|
"dpo_loss": 0.6443823575973511, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 18.676003369955335, |
|
"learning_rate": 8.018867924528302e-07, |
|
"logits": -1.5786373615264893, |
|
"logps": -84.9271011352539, |
|
"loss": 0.6489, |
|
"objective": 0.6443823575973511, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6443823575973511, |
|
"step": 85, |
|
"wo_beta": 5.872949600219727 |
|
}, |
|
{ |
|
"dpo_loss": 0.6308037638664246, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 18.36712876918143, |
|
"learning_rate": 8.490566037735849e-07, |
|
"logits": -1.693374752998352, |
|
"logps": -85.01197052001953, |
|
"loss": 0.6429, |
|
"objective": 0.6308037638664246, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.6308037638664246, |
|
"step": 90, |
|
"wo_beta": 6.442193031311035 |
|
}, |
|
{ |
|
"dpo_loss": 0.6293771266937256, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 19.143294691787162, |
|
"learning_rate": 8.962264150943396e-07, |
|
"logits": -1.6244534254074097, |
|
"logps": -84.94630432128906, |
|
"loss": 0.6392, |
|
"objective": 0.6293771266937256, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.6293771266937256, |
|
"step": 95, |
|
"wo_beta": 7.510855197906494 |
|
}, |
|
{ |
|
"dpo_loss": 0.6378400921821594, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 18.923136932397636, |
|
"learning_rate": 9.433962264150943e-07, |
|
"logits": -1.531855821609497, |
|
"logps": -84.18727111816406, |
|
"loss": 0.6459, |
|
"objective": 0.6378400921821594, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.6378400921821594, |
|
"step": 100, |
|
"wo_beta": 6.881345748901367 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6793138384819031, |
|
"eval_logits": -1.6510542631149292, |
|
"eval_logps": -92.99544525146484, |
|
"eval_loss": 0.6764773726463318, |
|
"eval_objective": 0.6793138384819031, |
|
"eval_ranking_simple": 0.534679114818573, |
|
"eval_regularize": 0.6793138384819031, |
|
"eval_runtime": 367.8383, |
|
"eval_samples_per_second": 15.741, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 7.872693061828613, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6514045000076294, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 24.32830193794003, |
|
"learning_rate": 9.90566037735849e-07, |
|
"logits": -1.7460098266601562, |
|
"logps": -88.28630828857422, |
|
"loss": 0.6307, |
|
"objective": 0.6514045000076294, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.6514045000076294, |
|
"step": 105, |
|
"wo_beta": 6.015078067779541 |
|
}, |
|
{ |
|
"dpo_loss": 0.6186487674713135, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 21.425599904477778, |
|
"learning_rate": 9.99956257238817e-07, |
|
"logits": -1.6647683382034302, |
|
"logps": -88.7564468383789, |
|
"loss": 0.619, |
|
"objective": 0.6186487674713135, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.6186487674713135, |
|
"step": 110, |
|
"wo_beta": 6.501535892486572 |
|
}, |
|
{ |
|
"dpo_loss": 0.6382968425750732, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 23.57030350866866, |
|
"learning_rate": 9.997785653888834e-07, |
|
"logits": -1.6553956270217896, |
|
"logps": -88.90452575683594, |
|
"loss": 0.6244, |
|
"objective": 0.6382968425750732, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.6382968425750732, |
|
"step": 115, |
|
"wo_beta": 6.804749488830566 |
|
}, |
|
{ |
|
"dpo_loss": 0.613271176815033, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 17.498249311505813, |
|
"learning_rate": 9.994642390694308e-07, |
|
"logits": -1.6392256021499634, |
|
"logps": -87.69567108154297, |
|
"loss": 0.6212, |
|
"objective": 0.613271176815033, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.613271176815033, |
|
"step": 120, |
|
"wo_beta": 7.195651054382324 |
|
}, |
|
{ |
|
"dpo_loss": 0.6102784872055054, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 17.38350046383242, |
|
"learning_rate": 9.990133642141357e-07, |
|
"logits": -1.6804019212722778, |
|
"logps": -89.06767272949219, |
|
"loss": 0.6218, |
|
"objective": 0.6102784872055054, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.6102784872055054, |
|
"step": 125, |
|
"wo_beta": 7.266691207885742 |
|
}, |
|
{ |
|
"dpo_loss": 0.6158983111381531, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 18.419918693556507, |
|
"learning_rate": 9.98426064087682e-07, |
|
"logits": -1.6602026224136353, |
|
"logps": -86.5518569946289, |
|
"loss": 0.6238, |
|
"objective": 0.6158983111381531, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6158983111381531, |
|
"step": 130, |
|
"wo_beta": 5.715666770935059 |
|
}, |
|
{ |
|
"dpo_loss": 0.6510148644447327, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 21.943398704973582, |
|
"learning_rate": 9.977024992520601e-07, |
|
"logits": -1.7066783905029297, |
|
"logps": -88.5932846069336, |
|
"loss": 0.6289, |
|
"objective": 0.6510148644447327, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.6510148644447327, |
|
"step": 135, |
|
"wo_beta": 7.166342258453369 |
|
}, |
|
{ |
|
"dpo_loss": 0.6169579029083252, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 19.9956879065625, |
|
"learning_rate": 9.968428675226713e-07, |
|
"logits": -1.6791799068450928, |
|
"logps": -89.6558837890625, |
|
"loss": 0.6144, |
|
"objective": 0.6169579029083252, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.6169579029083252, |
|
"step": 140, |
|
"wo_beta": 6.211281776428223 |
|
}, |
|
{ |
|
"dpo_loss": 0.5698094964027405, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 20.71491947708098, |
|
"learning_rate": 9.958474039142469e-07, |
|
"logits": -1.6847442388534546, |
|
"logps": -89.56360626220703, |
|
"loss": 0.5875, |
|
"objective": 0.5698094964027405, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.5698094964027405, |
|
"step": 145, |
|
"wo_beta": 5.130297660827637 |
|
}, |
|
{ |
|
"dpo_loss": 0.6153029203414917, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 19.57556543746145, |
|
"learning_rate": 9.947163805765979e-07, |
|
"logits": -1.70354425907135, |
|
"logps": -89.73717498779297, |
|
"loss": 0.5993, |
|
"objective": 0.6153029203414917, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.6153029203414917, |
|
"step": 150, |
|
"wo_beta": 8.4924955368042 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6804503202438354, |
|
"eval_logits": -1.6963125467300415, |
|
"eval_logps": -95.27294921875, |
|
"eval_loss": 0.6770597696304321, |
|
"eval_objective": 0.6804503202438354, |
|
"eval_ranking_simple": 0.534679114818573, |
|
"eval_regularize": 0.6804503202438354, |
|
"eval_runtime": 370.2983, |
|
"eval_samples_per_second": 15.636, |
|
"eval_steps_per_second": 1.304, |
|
"eval_wo_beta": 8.2155179977417, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.59377521276474, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 23.61683273964934, |
|
"learning_rate": 9.934501067202117e-07, |
|
"logits": -1.7478511333465576, |
|
"logps": -87.8529052734375, |
|
"loss": 0.5935, |
|
"objective": 0.59377521276474, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.59377521276474, |
|
"step": 155, |
|
"wo_beta": 7.3897318840026855 |
|
}, |
|
{ |
|
"dpo_loss": 0.589878261089325, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 19.912032474590337, |
|
"learning_rate": 9.92048928531717e-07, |
|
"logits": -1.6176892518997192, |
|
"logps": -88.71855926513672, |
|
"loss": 0.5812, |
|
"objective": 0.589878261089325, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.589878261089325, |
|
"step": 160, |
|
"wo_beta": 6.206328868865967 |
|
}, |
|
{ |
|
"dpo_loss": 0.5920292735099792, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 21.374874744599914, |
|
"learning_rate": 9.905132290792392e-07, |
|
"logits": -1.745171308517456, |
|
"logps": -91.21366119384766, |
|
"loss": 0.5859, |
|
"objective": 0.5920292735099792, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.5920292735099792, |
|
"step": 165, |
|
"wo_beta": 6.208406448364258 |
|
}, |
|
{ |
|
"dpo_loss": 0.6067599654197693, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 19.998053262503415, |
|
"learning_rate": 9.888434282076757e-07, |
|
"logits": -1.8083672523498535, |
|
"logps": -91.927734375, |
|
"loss": 0.5862, |
|
"objective": 0.6067599654197693, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.6067599654197693, |
|
"step": 170, |
|
"wo_beta": 7.094420433044434 |
|
}, |
|
{ |
|
"dpo_loss": 0.5829775929450989, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 22.779470361992754, |
|
"learning_rate": 9.870399824239114e-07, |
|
"logits": -1.6424000263214111, |
|
"logps": -91.81340789794922, |
|
"loss": 0.5844, |
|
"objective": 0.5829775929450989, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.5829775929450989, |
|
"step": 175, |
|
"wo_beta": 6.69417667388916 |
|
}, |
|
{ |
|
"dpo_loss": 0.5724970698356628, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 19.56386680488567, |
|
"learning_rate": 9.851033847720164e-07, |
|
"logits": -1.6553891897201538, |
|
"logps": -91.84220123291016, |
|
"loss": 0.5461, |
|
"objective": 0.5724970698356628, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.5724970698356628, |
|
"step": 180, |
|
"wo_beta": 6.825948238372803 |
|
}, |
|
{ |
|
"dpo_loss": 0.5537912845611572, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 23.895057754013074, |
|
"learning_rate": 9.83034164698452e-07, |
|
"logits": -1.6311272382736206, |
|
"logps": -90.77505493164062, |
|
"loss": 0.5628, |
|
"objective": 0.5537912845611572, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.5537912845611572, |
|
"step": 185, |
|
"wo_beta": 5.920953750610352 |
|
}, |
|
{ |
|
"dpo_loss": 0.5373654961585999, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 23.921385246249166, |
|
"learning_rate": 9.808328879073251e-07, |
|
"logits": -1.727196216583252, |
|
"logps": -92.44843292236328, |
|
"loss": 0.5475, |
|
"objective": 0.5373654961585999, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.5373654961585999, |
|
"step": 190, |
|
"wo_beta": 5.952234745025635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5564671754837036, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 24.71069656183637, |
|
"learning_rate": 9.78500156205731e-07, |
|
"logits": -1.7691571712493896, |
|
"logps": -102.49636840820312, |
|
"loss": 0.5838, |
|
"objective": 0.5564671754837036, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.5564671754837036, |
|
"step": 195, |
|
"wo_beta": 6.274513244628906 |
|
}, |
|
{ |
|
"dpo_loss": 0.5659002661705017, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 18.721765242615913, |
|
"learning_rate": 9.760366073392244e-07, |
|
"logits": -1.7705143690109253, |
|
"logps": -108.12271118164062, |
|
"loss": 0.5557, |
|
"objective": 0.5659002661705017, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.5659002661705017, |
|
"step": 200, |
|
"wo_beta": 6.690428733825684 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6866137981414795, |
|
"eval_logits": -1.8150068521499634, |
|
"eval_logps": -115.46802520751953, |
|
"eval_loss": 0.6857941150665283, |
|
"eval_objective": 0.6866137981414795, |
|
"eval_ranking_simple": 0.5295031070709229, |
|
"eval_regularize": 0.6866137981414795, |
|
"eval_runtime": 369.0235, |
|
"eval_samples_per_second": 15.69, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 7.96071720123291, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5478584170341492, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 21.07102836538459, |
|
"learning_rate": 9.734429148174674e-07, |
|
"logits": -1.6568210124969482, |
|
"logps": -108.47331237792969, |
|
"loss": 0.545, |
|
"objective": 0.5478584170341492, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5478584170341492, |
|
"step": 205, |
|
"wo_beta": 7.015623092651367 |
|
}, |
|
{ |
|
"dpo_loss": 0.5228937268257141, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 20.651776585733355, |
|
"learning_rate": 9.707197877300973e-07, |
|
"logits": -1.7239099740982056, |
|
"logps": -108.58413696289062, |
|
"loss": 0.5299, |
|
"objective": 0.5228937268257141, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.5228937268257141, |
|
"step": 210, |
|
"wo_beta": 5.675539970397949 |
|
}, |
|
{ |
|
"dpo_loss": 0.5450037121772766, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 23.006204591176342, |
|
"learning_rate": 9.678679705528698e-07, |
|
"logits": -1.818426251411438, |
|
"logps": -108.85588073730469, |
|
"loss": 0.5504, |
|
"objective": 0.5450037121772766, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.5450037121772766, |
|
"step": 215, |
|
"wo_beta": 6.871977806091309 |
|
}, |
|
{ |
|
"dpo_loss": 0.5016953945159912, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 21.55242582954206, |
|
"learning_rate": 9.648882429441256e-07, |
|
"logits": -1.796720266342163, |
|
"logps": -106.0498046875, |
|
"loss": 0.5337, |
|
"objective": 0.5016953945159912, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.5016953945159912, |
|
"step": 220, |
|
"wo_beta": 6.975856304168701 |
|
}, |
|
{ |
|
"dpo_loss": 0.5535920262336731, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 18.336769425586553, |
|
"learning_rate": 9.61781419531641e-07, |
|
"logits": -1.8808425664901733, |
|
"logps": -103.36882781982422, |
|
"loss": 0.5444, |
|
"objective": 0.5535920262336731, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.5535920262336731, |
|
"step": 225, |
|
"wo_beta": 6.945814609527588 |
|
}, |
|
{ |
|
"dpo_loss": 0.5067029595375061, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 20.395537847593562, |
|
"learning_rate": 9.585483496899149e-07, |
|
"logits": -1.781941533088684, |
|
"logps": -102.78409576416016, |
|
"loss": 0.5275, |
|
"objective": 0.5067029595375061, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.5067029595375061, |
|
"step": 230, |
|
"wo_beta": 4.95630407333374 |
|
}, |
|
{ |
|
"dpo_loss": 0.5263319611549377, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 24.579908080459226, |
|
"learning_rate": 9.551899173079606e-07, |
|
"logits": -1.7018815279006958, |
|
"logps": -107.19641876220703, |
|
"loss": 0.5235, |
|
"objective": 0.5263319611549377, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.5263319611549377, |
|
"step": 235, |
|
"wo_beta": 6.136026859283447 |
|
}, |
|
{ |
|
"dpo_loss": 0.5419639945030212, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 25.457069150013837, |
|
"learning_rate": 9.517070405476574e-07, |
|
"logits": -1.8716365098953247, |
|
"logps": -109.35581970214844, |
|
"loss": 0.5391, |
|
"objective": 0.5419639945030212, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.5419639945030212, |
|
"step": 240, |
|
"wo_beta": 7.386639595031738 |
|
}, |
|
{ |
|
"dpo_loss": 0.599401593208313, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 20.952521291073488, |
|
"learning_rate": 9.481006715927351e-07, |
|
"logits": -1.8737353086471558, |
|
"logps": -105.67475128173828, |
|
"loss": 0.5586, |
|
"objective": 0.599401593208313, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.599401593208313, |
|
"step": 245, |
|
"wo_beta": 6.23760986328125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5379212498664856, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 17.076599406497994, |
|
"learning_rate": 9.443717963884568e-07, |
|
"logits": -1.6231579780578613, |
|
"logps": -97.79161834716797, |
|
"loss": 0.5428, |
|
"objective": 0.5379212498664856, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.5379212498664856, |
|
"step": 250, |
|
"wo_beta": 5.470564842224121 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.674086332321167, |
|
"eval_logits": -1.8494712114334106, |
|
"eval_logps": -102.56678771972656, |
|
"eval_loss": 0.6744823455810547, |
|
"eval_objective": 0.674086332321167, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 0.674086332321167, |
|
"eval_runtime": 369.2355, |
|
"eval_samples_per_second": 15.681, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wo_beta": 7.989133358001709, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.4980238378047943, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 18.645503031645394, |
|
"learning_rate": 9.405214343720706e-07, |
|
"logits": -1.8677008152008057, |
|
"logps": -94.05529022216797, |
|
"loss": 0.5176, |
|
"objective": 0.4980238378047943, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.4980238378047943, |
|
"step": 255, |
|
"wo_beta": 5.837521553039551 |
|
}, |
|
{ |
|
"dpo_loss": 0.5208728909492493, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 22.040310265005534, |
|
"learning_rate": 9.365506381941065e-07, |
|
"logits": -1.8145065307617188, |
|
"logps": -96.94599914550781, |
|
"loss": 0.5381, |
|
"objective": 0.5208728909492493, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.5208728909492493, |
|
"step": 260, |
|
"wo_beta": 6.736252784729004 |
|
}, |
|
{ |
|
"dpo_loss": 0.4980500638484955, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 23.39044199948042, |
|
"learning_rate": 9.32460493430591e-07, |
|
"logits": -1.7500866651535034, |
|
"logps": -96.90016174316406, |
|
"loss": 0.5023, |
|
"objective": 0.4980500638484955, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.4980500638484955, |
|
"step": 265, |
|
"wo_beta": 6.221852779388428 |
|
}, |
|
{ |
|
"dpo_loss": 0.5561876893043518, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 20.73702481860498, |
|
"learning_rate": 9.282521182862629e-07, |
|
"logits": -1.8606762886047363, |
|
"logps": -96.93506622314453, |
|
"loss": 0.5259, |
|
"objective": 0.5561876893043518, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.5561876893043518, |
|
"step": 270, |
|
"wo_beta": 6.527937412261963 |
|
}, |
|
{ |
|
"dpo_loss": 0.5460684299468994, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 19.98766091687606, |
|
"learning_rate": 9.239266632888658e-07, |
|
"logits": -1.697192668914795, |
|
"logps": -96.09431457519531, |
|
"loss": 0.5114, |
|
"objective": 0.5460684299468994, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.5460684299468994, |
|
"step": 275, |
|
"wo_beta": 6.768658638000488 |
|
}, |
|
{ |
|
"dpo_loss": 0.47327375411987305, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 21.02838944330306, |
|
"learning_rate": 9.194853109746072e-07, |
|
"logits": -1.8069101572036743, |
|
"logps": -97.56507873535156, |
|
"loss": 0.491, |
|
"objective": 0.47327375411987305, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.47327375411987305, |
|
"step": 280, |
|
"wo_beta": 5.885926723480225 |
|
}, |
|
{ |
|
"dpo_loss": 0.5277642607688904, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 22.871584835681148, |
|
"learning_rate": 9.14929275564863e-07, |
|
"logits": -1.8146883249282837, |
|
"logps": -99.09355926513672, |
|
"loss": 0.5087, |
|
"objective": 0.5277642607688904, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.5277642607688904, |
|
"step": 285, |
|
"wo_beta": 7.316993236541748 |
|
}, |
|
{ |
|
"dpo_loss": 0.43708011507987976, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 25.529916734053177, |
|
"learning_rate": 9.102598026342222e-07, |
|
"logits": -1.8029001951217651, |
|
"logps": -96.13628387451172, |
|
"loss": 0.4688, |
|
"objective": 0.43708011507987976, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.43708011507987976, |
|
"step": 290, |
|
"wo_beta": 4.752199649810791 |
|
}, |
|
{ |
|
"dpo_loss": 0.49055105447769165, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 26.014797277152265, |
|
"learning_rate": 9.0547816876996e-07, |
|
"logits": -1.8255597352981567, |
|
"logps": -96.79292297363281, |
|
"loss": 0.4938, |
|
"objective": 0.49055105447769165, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.49055105447769165, |
|
"step": 295, |
|
"wo_beta": 6.931264400482178 |
|
}, |
|
{ |
|
"dpo_loss": 0.4934200048446655, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 22.235912832826426, |
|
"learning_rate": 9.005856812230304e-07, |
|
"logits": -1.8234219551086426, |
|
"logps": -98.131103515625, |
|
"loss": 0.4987, |
|
"objective": 0.4934200048446655, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.4934200048446655, |
|
"step": 300, |
|
"wo_beta": 4.583770275115967 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.7202683091163635, |
|
"eval_logits": -1.9276690483093262, |
|
"eval_logps": -110.09490966796875, |
|
"eval_loss": 0.711872398853302, |
|
"eval_objective": 0.7202683091163635, |
|
"eval_ranking_simple": 0.5372670888900757, |
|
"eval_regularize": 0.7202683091163635, |
|
"eval_runtime": 369.1999, |
|
"eval_samples_per_second": 15.683, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wo_beta": 8.926669120788574, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5209147334098816, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 21.742768863559323, |
|
"learning_rate": 8.955836775506775e-07, |
|
"logits": -1.8012293577194214, |
|
"logps": -102.54885864257812, |
|
"loss": 0.4739, |
|
"objective": 0.5209147334098816, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.5209147334098816, |
|
"step": 305, |
|
"wo_beta": 5.649385929107666 |
|
}, |
|
{ |
|
"dpo_loss": 0.4898562729358673, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 22.266286975377888, |
|
"learning_rate": 8.904735252507609e-07, |
|
"logits": -1.8041430711746216, |
|
"logps": -101.15433502197266, |
|
"loss": 0.488, |
|
"objective": 0.4898562729358673, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.4898562729358673, |
|
"step": 310, |
|
"wo_beta": 6.073896408081055 |
|
}, |
|
{ |
|
"dpo_loss": 0.4698447585105896, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 22.62826678390999, |
|
"learning_rate": 8.852566213878946e-07, |
|
"logits": -1.798747181892395, |
|
"logps": -101.55271911621094, |
|
"loss": 0.4839, |
|
"objective": 0.4698447585105896, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.4698447585105896, |
|
"step": 315, |
|
"wo_beta": 5.962237358093262 |
|
}, |
|
{ |
|
"dpo_loss": 0.4936090409755707, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 25.39576707145419, |
|
"learning_rate": 8.799343922115043e-07, |
|
"logits": -1.7406024932861328, |
|
"logps": -110.01341247558594, |
|
"loss": 0.5003, |
|
"objective": 0.4936090409755707, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.4936090409755707, |
|
"step": 320, |
|
"wo_beta": 4.6922502517700195 |
|
}, |
|
{ |
|
"dpo_loss": 0.464478075504303, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 22.63000678324253, |
|
"learning_rate": 8.745082927659046e-07, |
|
"logits": -1.8573758602142334, |
|
"logps": -106.48271179199219, |
|
"loss": 0.4601, |
|
"objective": 0.464478075504303, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.464478075504303, |
|
"step": 325, |
|
"wo_beta": 5.424474239349365 |
|
}, |
|
{ |
|
"dpo_loss": 0.4378047287464142, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 26.572088004567764, |
|
"learning_rate": 8.689798064925048e-07, |
|
"logits": -1.6883081197738647, |
|
"logps": -104.9384765625, |
|
"loss": 0.4724, |
|
"objective": 0.4378047287464142, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.4378047287464142, |
|
"step": 330, |
|
"wo_beta": 5.635779857635498 |
|
}, |
|
{ |
|
"dpo_loss": 0.4327344000339508, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 28.535014872524457, |
|
"learning_rate": 8.633504448242504e-07, |
|
"logits": -1.6694140434265137, |
|
"logps": -107.3168716430664, |
|
"loss": 0.4595, |
|
"objective": 0.4327344000339508, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.4327344000339508, |
|
"step": 335, |
|
"wo_beta": 5.270318508148193 |
|
}, |
|
{ |
|
"dpo_loss": 0.4696439504623413, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 29.414333709201422, |
|
"learning_rate": 8.576217467724127e-07, |
|
"logits": -1.7229362726211548, |
|
"logps": -106.86974334716797, |
|
"loss": 0.5044, |
|
"objective": 0.4696439504623413, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.4696439504623413, |
|
"step": 340, |
|
"wo_beta": 5.814812660217285 |
|
}, |
|
{ |
|
"dpo_loss": 0.43202081322669983, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 21.299611385252437, |
|
"learning_rate": 8.517952785058384e-07, |
|
"logits": -1.7784336805343628, |
|
"logps": -101.36027526855469, |
|
"loss": 0.4585, |
|
"objective": 0.43202081322669983, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.43202081322669983, |
|
"step": 345, |
|
"wo_beta": 4.479858875274658 |
|
}, |
|
{ |
|
"dpo_loss": 0.4188024699687958, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 20.8027959046757, |
|
"learning_rate": 8.458726329227747e-07, |
|
"logits": -1.6966959238052368, |
|
"logps": -99.99575805664062, |
|
"loss": 0.4599, |
|
"objective": 0.4188024699687958, |
|
"ranking_simple": 0.762499988079071, |
|
"regularize": 0.4188024699687958, |
|
"step": 350, |
|
"wo_beta": 4.140859603881836 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6912401914596558, |
|
"eval_logits": -1.8473907709121704, |
|
"eval_logps": -104.9832763671875, |
|
"eval_loss": 0.6885838508605957, |
|
"eval_objective": 0.6912401914596558, |
|
"eval_ranking_simple": 0.5351966619491577, |
|
"eval_regularize": 0.6912401914596558, |
|
"eval_runtime": 375.4224, |
|
"eval_samples_per_second": 15.423, |
|
"eval_steps_per_second": 1.287, |
|
"eval_wo_beta": 8.374881744384766, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.4273616671562195, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 19.801974743070023, |
|
"learning_rate": 8.398554292153865e-07, |
|
"logits": -1.9038132429122925, |
|
"logps": -97.71568298339844, |
|
"loss": 0.4198, |
|
"objective": 0.4273616671562195, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.4273616671562195, |
|
"step": 355, |
|
"wo_beta": 4.851585388183594 |
|
}, |
|
{ |
|
"dpo_loss": 0.3563633859157562, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 27.347143857547685, |
|
"learning_rate": 8.337453124270862e-07, |
|
"logits": -1.720730185508728, |
|
"logps": -104.91397094726562, |
|
"loss": 0.3668, |
|
"objective": 0.3563633859157562, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.3563633859157562, |
|
"step": 360, |
|
"wo_beta": 4.480281352996826 |
|
}, |
|
{ |
|
"dpo_loss": 0.34869903326034546, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 21.348824435063978, |
|
"learning_rate": 8.275439530027947e-07, |
|
"logits": -1.730131983757019, |
|
"logps": -111.44975280761719, |
|
"loss": 0.3663, |
|
"objective": 0.34869903326034546, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.34869903326034546, |
|
"step": 365, |
|
"wo_beta": 3.829502820968628 |
|
}, |
|
{ |
|
"dpo_loss": 0.3821018934249878, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 23.44162059864065, |
|
"learning_rate": 8.212530463322582e-07, |
|
"logits": -1.7056537866592407, |
|
"logps": -104.91122436523438, |
|
"loss": 0.3708, |
|
"objective": 0.3821018934249878, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.3821018934249878, |
|
"step": 370, |
|
"wo_beta": 5.17144250869751 |
|
}, |
|
{ |
|
"dpo_loss": 0.3470642864704132, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 21.682188897692722, |
|
"learning_rate": 8.148743122865463e-07, |
|
"logits": -1.7548179626464844, |
|
"logps": -104.17964172363281, |
|
"loss": 0.3665, |
|
"objective": 0.3470642864704132, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.3470642864704132, |
|
"step": 375, |
|
"wo_beta": 3.6793243885040283 |
|
}, |
|
{ |
|
"dpo_loss": 0.35725000500679016, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 22.228388769880265, |
|
"learning_rate": 8.084094947478554e-07, |
|
"logits": -1.7486780881881714, |
|
"logps": -107.26901245117188, |
|
"loss": 0.3593, |
|
"objective": 0.35725000500679016, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.35725000500679016, |
|
"step": 380, |
|
"wo_beta": 3.6982295513153076 |
|
}, |
|
{ |
|
"dpo_loss": 0.36463692784309387, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 21.586757686789323, |
|
"learning_rate": 8.018603611327504e-07, |
|
"logits": -1.6958861351013184, |
|
"logps": -106.55477905273438, |
|
"loss": 0.3642, |
|
"objective": 0.36463692784309387, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.36463692784309387, |
|
"step": 385, |
|
"wo_beta": 4.191780090332031 |
|
}, |
|
{ |
|
"dpo_loss": 0.32307326793670654, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 21.44019149145441, |
|
"learning_rate": 7.952287019089685e-07, |
|
"logits": -1.6498711109161377, |
|
"logps": -109.88943481445312, |
|
"loss": 0.3408, |
|
"objective": 0.32307326793670654, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.32307326793670654, |
|
"step": 390, |
|
"wo_beta": 4.2303996086120605 |
|
}, |
|
{ |
|
"dpo_loss": 0.3521862328052521, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 25.820266993638842, |
|
"learning_rate": 7.88516330105925e-07, |
|
"logits": -1.726596713066101, |
|
"logps": -108.69520568847656, |
|
"loss": 0.347, |
|
"objective": 0.3521862328052521, |
|
"ranking_simple": 0.7291666865348816, |
|
"regularize": 0.3521862328052521, |
|
"step": 395, |
|
"wo_beta": 3.9551658630371094 |
|
}, |
|
{ |
|
"dpo_loss": 0.343461811542511, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 22.721697115040456, |
|
"learning_rate": 7.817250808190483e-07, |
|
"logits": -1.8199702501296997, |
|
"logps": -106.22103118896484, |
|
"loss": 0.3498, |
|
"objective": 0.343461811542511, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.343461811542511, |
|
"step": 400, |
|
"wo_beta": 3.51658034324646 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.751783549785614, |
|
"eval_logits": -1.8806556463241577, |
|
"eval_logps": -115.0888671875, |
|
"eval_loss": 0.7462870478630066, |
|
"eval_objective": 0.751783549785614, |
|
"eval_ranking_simple": 0.5517598390579224, |
|
"eval_regularize": 0.751783549785614, |
|
"eval_runtime": 370.3425, |
|
"eval_samples_per_second": 15.634, |
|
"eval_steps_per_second": 1.304, |
|
"eval_wo_beta": 9.550463676452637, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.3792721927165985, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 32.9676442992408, |
|
"learning_rate": 7.74856810708083e-07, |
|
"logits": -1.7736860513687134, |
|
"logps": -107.8192367553711, |
|
"loss": 0.3694, |
|
"objective": 0.3792721927165985, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.3792721927165985, |
|
"step": 405, |
|
"wo_beta": 5.114300727844238 |
|
}, |
|
{ |
|
"dpo_loss": 0.3683342933654785, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 21.343924046898124, |
|
"learning_rate": 7.679133974894982e-07, |
|
"logits": -1.7912460565567017, |
|
"logps": -105.35082244873047, |
|
"loss": 0.3582, |
|
"objective": 0.3683342933654785, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.3683342933654785, |
|
"step": 410, |
|
"wo_beta": 5.410634517669678 |
|
}, |
|
{ |
|
"dpo_loss": 0.3712156116962433, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 20.631633414155974, |
|
"learning_rate": 7.608967394231386e-07, |
|
"logits": -1.7320811748504639, |
|
"logps": -101.5705337524414, |
|
"loss": 0.3623, |
|
"objective": 0.3712156116962433, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.3712156116962433, |
|
"step": 415, |
|
"wo_beta": 5.24213171005249 |
|
}, |
|
{ |
|
"dpo_loss": 0.34116995334625244, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 21.634750653170887, |
|
"learning_rate": 7.538087547932584e-07, |
|
"logits": -1.67872953414917, |
|
"logps": -101.14533233642578, |
|
"loss": 0.359, |
|
"objective": 0.34116995334625244, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.34116995334625244, |
|
"step": 420, |
|
"wo_beta": 4.399470329284668 |
|
}, |
|
{ |
|
"dpo_loss": 0.3757858872413635, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 21.74672077991223, |
|
"learning_rate": 7.466513813840824e-07, |
|
"logits": -1.733936071395874, |
|
"logps": -99.8553466796875, |
|
"loss": 0.3472, |
|
"objective": 0.3757858872413635, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.3757858872413635, |
|
"step": 425, |
|
"wo_beta": 4.1172003746032715 |
|
}, |
|
{ |
|
"dpo_loss": 0.3544313311576843, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 25.498669816549643, |
|
"learning_rate": 7.394265759500347e-07, |
|
"logits": -1.7176556587219238, |
|
"logps": -105.41299438476562, |
|
"loss": 0.3569, |
|
"objective": 0.3544313311576843, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.3544313311576843, |
|
"step": 430, |
|
"wo_beta": 4.10382604598999 |
|
}, |
|
{ |
|
"dpo_loss": 0.35303547978401184, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 24.236679665592966, |
|
"learning_rate": 7.321363136807818e-07, |
|
"logits": -1.6603659391403198, |
|
"logps": -107.16277313232422, |
|
"loss": 0.3778, |
|
"objective": 0.35303547978401184, |
|
"ranking_simple": 0.75, |
|
"regularize": 0.35303547978401184, |
|
"step": 435, |
|
"wo_beta": 3.8118536472320557 |
|
}, |
|
{ |
|
"dpo_loss": 0.32170751690864563, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 25.528958844465567, |
|
"learning_rate": 7.247825876612352e-07, |
|
"logits": -1.6398621797561646, |
|
"logps": -105.9729995727539, |
|
"loss": 0.3429, |
|
"objective": 0.32170751690864563, |
|
"ranking_simple": 0.762499988079071, |
|
"regularize": 0.32170751690864563, |
|
"step": 440, |
|
"wo_beta": 3.466229200363159 |
|
}, |
|
{ |
|
"dpo_loss": 0.35508811473846436, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 20.894838905133128, |
|
"learning_rate": 7.173674083266623e-07, |
|
"logits": -1.6645927429199219, |
|
"logps": -106.34298706054688, |
|
"loss": 0.3613, |
|
"objective": 0.35508811473846436, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.35508811473846436, |
|
"step": 445, |
|
"wo_beta": 4.097968101501465 |
|
}, |
|
{ |
|
"dpo_loss": 0.32759609818458557, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 21.051410520730908, |
|
"learning_rate": 7.098928029130528e-07, |
|
"logits": -1.8029848337173462, |
|
"logps": -107.71712493896484, |
|
"loss": 0.3361, |
|
"objective": 0.32759609818458557, |
|
"ranking_simple": 0.7583333253860474, |
|
"regularize": 0.32759609818458557, |
|
"step": 450, |
|
"wo_beta": 3.633894443511963 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.7672637701034546, |
|
"eval_logits": -1.8356177806854248, |
|
"eval_logps": -116.80036926269531, |
|
"eval_loss": 0.7562825679779053, |
|
"eval_objective": 0.7672637701034546, |
|
"eval_ranking_simple": 0.5419254899024963, |
|
"eval_regularize": 0.7672637701034546, |
|
"eval_runtime": 367.9761, |
|
"eval_samples_per_second": 15.735, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 9.725224494934082, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.3631579279899597, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 27.512782619434457, |
|
"learning_rate": 7.023608149028936e-07, |
|
"logits": -1.6034198999404907, |
|
"logps": -110.81619262695312, |
|
"loss": 0.3689, |
|
"objective": 0.3631579279899597, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.3631579279899597, |
|
"step": 455, |
|
"wo_beta": 4.727202892303467 |
|
}, |
|
{ |
|
"dpo_loss": 0.3907562792301178, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 24.168580409550643, |
|
"learning_rate": 6.947735034665001e-07, |
|
"logits": -1.7300904989242554, |
|
"logps": -108.0628890991211, |
|
"loss": 0.3712, |
|
"objective": 0.3907562792301178, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.3907562792301178, |
|
"step": 460, |
|
"wo_beta": 5.317975997924805 |
|
}, |
|
{ |
|
"dpo_loss": 0.3442947268486023, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 22.349252886711223, |
|
"learning_rate": 6.871329428990601e-07, |
|
"logits": -1.7600762844085693, |
|
"logps": -102.89717102050781, |
|
"loss": 0.366, |
|
"objective": 0.3442947268486023, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.3442947268486023, |
|
"step": 465, |
|
"wo_beta": 4.05583381652832 |
|
}, |
|
{ |
|
"dpo_loss": 0.36818769574165344, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 21.171077905427786, |
|
"learning_rate": 6.794412220535425e-07, |
|
"logits": -1.8482578992843628, |
|
"logps": -100.47290802001953, |
|
"loss": 0.3612, |
|
"objective": 0.36818769574165344, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.36818769574165344, |
|
"step": 470, |
|
"wo_beta": 3.844135046005249 |
|
}, |
|
{ |
|
"dpo_loss": 0.3290488123893738, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 23.17750388094903, |
|
"learning_rate": 6.717004437696249e-07, |
|
"logits": -1.6789878606796265, |
|
"logps": -102.9361572265625, |
|
"loss": 0.3486, |
|
"objective": 0.3290488123893738, |
|
"ranking_simple": 0.7541666626930237, |
|
"regularize": 0.3290488123893738, |
|
"step": 475, |
|
"wo_beta": 3.849888324737549 |
|
}, |
|
{ |
|
"dpo_loss": 0.3331288993358612, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 31.57784191234236, |
|
"learning_rate": 6.639127242987987e-07, |
|
"logits": -1.7997510433197021, |
|
"logps": -105.81382751464844, |
|
"loss": 0.3418, |
|
"objective": 0.3331288993358612, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.3331288993358612, |
|
"step": 480, |
|
"wo_beta": 5.502153396606445 |
|
}, |
|
{ |
|
"dpo_loss": 0.3518763482570648, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 25.72021920875409, |
|
"learning_rate": 6.560801927258079e-07, |
|
"logits": -1.763397455215454, |
|
"logps": -105.35774993896484, |
|
"loss": 0.3606, |
|
"objective": 0.3518763482570648, |
|
"ranking_simple": 0.7416666746139526, |
|
"regularize": 0.3518763482570648, |
|
"step": 485, |
|
"wo_beta": 4.792428016662598 |
|
}, |
|
{ |
|
"dpo_loss": 0.3617197871208191, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 22.21535306569678, |
|
"learning_rate": 6.482049903865768e-07, |
|
"logits": -1.7518786191940308, |
|
"logps": -109.02607727050781, |
|
"loss": 0.3414, |
|
"objective": 0.3617197871208191, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.3617197871208191, |
|
"step": 490, |
|
"wo_beta": 3.807429790496826 |
|
}, |
|
{ |
|
"dpo_loss": 0.33107537031173706, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 28.27182569867929, |
|
"learning_rate": 6.402892702827916e-07, |
|
"logits": -1.7524651288986206, |
|
"logps": -110.29097747802734, |
|
"loss": 0.3453, |
|
"objective": 0.33107537031173706, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.33107537031173706, |
|
"step": 495, |
|
"wo_beta": 4.099748611450195 |
|
}, |
|
{ |
|
"dpo_loss": 0.3612217307090759, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 26.907643867295747, |
|
"learning_rate": 6.323351964932908e-07, |
|
"logits": -1.6837791204452515, |
|
"logps": -109.02066802978516, |
|
"loss": 0.3584, |
|
"objective": 0.3612217307090759, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.3612217307090759, |
|
"step": 500, |
|
"wo_beta": 3.91571307182312 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7694771885871887, |
|
"eval_logits": -1.8626275062561035, |
|
"eval_logps": -117.51673126220703, |
|
"eval_loss": 0.7635300159454346, |
|
"eval_objective": 0.7694771885871887, |
|
"eval_ranking_simple": 0.5419254899024963, |
|
"eval_regularize": 0.7694771885871887, |
|
"eval_runtime": 370.4437, |
|
"eval_samples_per_second": 15.63, |
|
"eval_steps_per_second": 1.304, |
|
"eval_wo_beta": 9.631916999816895, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.2975477874279022, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 22.51215190081199, |
|
"learning_rate": 6.243449435824276e-07, |
|
"logits": -1.7515524625778198, |
|
"logps": -109.30506134033203, |
|
"loss": 0.3308, |
|
"objective": 0.2975477874279022, |
|
"ranking_simple": 0.75, |
|
"regularize": 0.2975477874279022, |
|
"step": 505, |
|
"wo_beta": 3.399846076965332 |
|
}, |
|
{ |
|
"dpo_loss": 0.3239024877548218, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 25.756520666755268, |
|
"learning_rate": 6.163206960055652e-07, |
|
"logits": -1.7505611181259155, |
|
"logps": -108.20401000976562, |
|
"loss": 0.3502, |
|
"objective": 0.3239024877548218, |
|
"ranking_simple": 0.75, |
|
"regularize": 0.3239024877548218, |
|
"step": 510, |
|
"wo_beta": 3.145045042037964 |
|
}, |
|
{ |
|
"dpo_loss": 0.3271316587924957, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 22.00239796246049, |
|
"learning_rate": 6.082646475118699e-07, |
|
"logits": -1.8232632875442505, |
|
"logps": -107.82081604003906, |
|
"loss": 0.3356, |
|
"objective": 0.3271316587924957, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.3271316587924957, |
|
"step": 515, |
|
"wo_beta": 4.939964771270752 |
|
}, |
|
{ |
|
"dpo_loss": 0.30039647221565247, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 22.8094809920846, |
|
"learning_rate": 6.001790005445606e-07, |
|
"logits": -1.6817113161087036, |
|
"logps": -106.67537689208984, |
|
"loss": 0.3092, |
|
"objective": 0.30039647221565247, |
|
"ranking_simple": 0.7541666626930237, |
|
"regularize": 0.30039647221565247, |
|
"step": 520, |
|
"wo_beta": 3.6650784015655518 |
|
}, |
|
{ |
|
"dpo_loss": 0.34032100439071655, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 25.538184332619608, |
|
"learning_rate": 5.920659656387836e-07, |
|
"logits": -1.5860577821731567, |
|
"logps": -107.61659240722656, |
|
"loss": 0.3466, |
|
"objective": 0.34032100439071655, |
|
"ranking_simple": 0.75, |
|
"regularize": 0.34032100439071655, |
|
"step": 525, |
|
"wo_beta": 4.031210422515869 |
|
}, |
|
{ |
|
"dpo_loss": 0.3295021951198578, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 22.986849455924027, |
|
"learning_rate": 5.839277608172738e-07, |
|
"logits": -1.743402123451233, |
|
"logps": -111.25679016113281, |
|
"loss": 0.3262, |
|
"objective": 0.3295021951198578, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.3295021951198578, |
|
"step": 530, |
|
"wo_beta": 3.9925944805145264 |
|
}, |
|
{ |
|
"dpo_loss": 0.29601436853408813, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 25.044023904201577, |
|
"learning_rate": 5.757666109839702e-07, |
|
"logits": -1.7071605920791626, |
|
"logps": -112.61083984375, |
|
"loss": 0.3088, |
|
"objective": 0.29601436853408813, |
|
"ranking_simple": 0.7541666626930237, |
|
"regularize": 0.29601436853408813, |
|
"step": 535, |
|
"wo_beta": 3.535855531692505 |
|
}, |
|
{ |
|
"dpo_loss": 0.350533127784729, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 22.77917543570306, |
|
"learning_rate": 5.675847473157485e-07, |
|
"logits": -1.6420516967773438, |
|
"logps": -114.60887145996094, |
|
"loss": 0.338, |
|
"objective": 0.350533127784729, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.350533127784729, |
|
"step": 540, |
|
"wo_beta": 3.302687644958496 |
|
}, |
|
{ |
|
"dpo_loss": 0.31907784938812256, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 27.52281539562519, |
|
"learning_rate": 5.5938440665244e-07, |
|
"logits": -1.7368324995040894, |
|
"logps": -117.18190002441406, |
|
"loss": 0.3311, |
|
"objective": 0.31907784938812256, |
|
"ranking_simple": 0.7875000238418579, |
|
"regularize": 0.31907784938812256, |
|
"step": 545, |
|
"wo_beta": 2.7296268939971924 |
|
}, |
|
{ |
|
"dpo_loss": 0.3072589933872223, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 24.90909396316388, |
|
"learning_rate": 5.511678308853025e-07, |
|
"logits": -1.7334787845611572, |
|
"logps": -118.51893615722656, |
|
"loss": 0.3343, |
|
"objective": 0.3072589933872223, |
|
"ranking_simple": 0.7208333611488342, |
|
"regularize": 0.3072589933872223, |
|
"step": 550, |
|
"wo_beta": 3.902778148651123 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7814171314239502, |
|
"eval_logits": -1.8209288120269775, |
|
"eval_logps": -123.38627624511719, |
|
"eval_loss": 0.7698224186897278, |
|
"eval_objective": 0.7814171314239502, |
|
"eval_ranking_simple": 0.5351966619491577, |
|
"eval_regularize": 0.7814171314239502, |
|
"eval_runtime": 367.7587, |
|
"eval_samples_per_second": 15.744, |
|
"eval_steps_per_second": 1.313, |
|
"eval_wo_beta": 9.825753211975098, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.34100914001464844, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 24.299530028848974, |
|
"learning_rate": 5.429372663441085e-07, |
|
"logits": -1.5191646814346313, |
|
"logps": -116.5060806274414, |
|
"loss": 0.339, |
|
"objective": 0.34100914001464844, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.34100914001464844, |
|
"step": 555, |
|
"wo_beta": 3.8065884113311768 |
|
}, |
|
{ |
|
"dpo_loss": 0.3122609257698059, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 26.61780255104318, |
|
"learning_rate": 5.34694963183022e-07, |
|
"logits": -1.5900938510894775, |
|
"logps": -112.42914581298828, |
|
"loss": 0.334, |
|
"objective": 0.3122609257698059, |
|
"ranking_simple": 0.7666666507720947, |
|
"regularize": 0.3122609257698059, |
|
"step": 560, |
|
"wo_beta": 4.191233158111572 |
|
}, |
|
{ |
|
"dpo_loss": 0.3086826205253601, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 23.97171206395212, |
|
"learning_rate": 5.264431747654283e-07, |
|
"logits": -1.5865463018417358, |
|
"logps": -112.55115509033203, |
|
"loss": 0.334, |
|
"objective": 0.3086826205253601, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.3086826205253601, |
|
"step": 565, |
|
"wo_beta": 3.760244846343994 |
|
}, |
|
{ |
|
"dpo_loss": 0.3280898928642273, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 26.790227869518926, |
|
"learning_rate": 5.181841570478872e-07, |
|
"logits": -1.7293529510498047, |
|
"logps": -114.50617980957031, |
|
"loss": 0.3065, |
|
"objective": 0.3280898928642273, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.3280898928642273, |
|
"step": 570, |
|
"wo_beta": 4.093240737915039 |
|
}, |
|
{ |
|
"dpo_loss": 0.356030136346817, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 29.38280925874381, |
|
"learning_rate": 5.099201679633768e-07, |
|
"logits": -1.7021836042404175, |
|
"logps": -112.18016052246094, |
|
"loss": 0.3442, |
|
"objective": 0.356030136346817, |
|
"ranking_simple": 0.7458333373069763, |
|
"regularize": 0.356030136346817, |
|
"step": 575, |
|
"wo_beta": 3.8104941844940186 |
|
}, |
|
{ |
|
"dpo_loss": 0.2992390990257263, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 24.710654774900764, |
|
"learning_rate": 5.016534668039976e-07, |
|
"logits": -1.73283052444458, |
|
"logps": -111.73848724365234, |
|
"loss": 0.3113, |
|
"objective": 0.2992390990257263, |
|
"ranking_simple": 0.762499988079071, |
|
"regularize": 0.2992390990257263, |
|
"step": 580, |
|
"wo_beta": 2.8948888778686523 |
|
}, |
|
{ |
|
"dpo_loss": 0.30101045966148376, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 25.736134751129057, |
|
"learning_rate": 4.933863136033039e-07, |
|
"logits": -1.5684159994125366, |
|
"logps": -111.7412109375, |
|
"loss": 0.3245, |
|
"objective": 0.30101045966148376, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.30101045966148376, |
|
"step": 585, |
|
"wo_beta": 3.5469541549682617 |
|
}, |
|
{ |
|
"dpo_loss": 0.29352518916130066, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 28.292224202430326, |
|
"learning_rate": 4.851209685184338e-07, |
|
"logits": -1.6094284057617188, |
|
"logps": -114.35250854492188, |
|
"loss": 0.3264, |
|
"objective": 0.29352518916130066, |
|
"ranking_simple": 0.7458333373069763, |
|
"regularize": 0.29352518916130066, |
|
"step": 590, |
|
"wo_beta": 3.287980794906616 |
|
}, |
|
{ |
|
"dpo_loss": 0.32477742433547974, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 26.952229320357798, |
|
"learning_rate": 4.768596912122045e-07, |
|
"logits": -1.5896912813186646, |
|
"logps": -114.52155303955078, |
|
"loss": 0.3252, |
|
"objective": 0.32477742433547974, |
|
"ranking_simple": 0.7416666746139526, |
|
"regularize": 0.32477742433547974, |
|
"step": 595, |
|
"wo_beta": 4.611125469207764 |
|
}, |
|
{ |
|
"dpo_loss": 0.2708142399787903, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 26.06064163090054, |
|
"learning_rate": 4.686047402353433e-07, |
|
"logits": -1.6145151853561401, |
|
"logps": -114.16989135742188, |
|
"loss": 0.3105, |
|
"objective": 0.2708142399787903, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.2708142399787903, |
|
"step": 600, |
|
"wo_beta": 3.207538604736328 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.7761210799217224, |
|
"eval_logits": -1.7865931987762451, |
|
"eval_logps": -119.82308959960938, |
|
"eval_loss": 0.7679409980773926, |
|
"eval_objective": 0.7761210799217224, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.7761210799217224, |
|
"eval_runtime": 372.6791, |
|
"eval_samples_per_second": 15.536, |
|
"eval_steps_per_second": 1.296, |
|
"eval_wo_beta": 9.803099632263184, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.32439085841178894, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 25.8504927528126, |
|
"learning_rate": 4.60358372409022e-07, |
|
"logits": -1.602834701538086, |
|
"logps": -112.0418472290039, |
|
"loss": 0.3292, |
|
"objective": 0.32439085841178894, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.32439085841178894, |
|
"step": 605, |
|
"wo_beta": 4.832021713256836 |
|
}, |
|
{ |
|
"dpo_loss": 0.36444205045700073, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 26.17653629180954, |
|
"learning_rate": 4.521228422078649e-07, |
|
"logits": -1.6547633409500122, |
|
"logps": -110.03471374511719, |
|
"loss": 0.3239, |
|
"objective": 0.36444205045700073, |
|
"ranking_simple": 0.75, |
|
"regularize": 0.36444205045700073, |
|
"step": 610, |
|
"wo_beta": 4.15641975402832 |
|
}, |
|
{ |
|
"dpo_loss": 0.3479357659816742, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 25.45060113638119, |
|
"learning_rate": 4.439004011435979e-07, |
|
"logits": -1.6919087171554565, |
|
"logps": -109.51602172851562, |
|
"loss": 0.3279, |
|
"objective": 0.3479357659816742, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.3479357659816742, |
|
"step": 615, |
|
"wo_beta": 4.716867446899414 |
|
}, |
|
{ |
|
"dpo_loss": 0.33088982105255127, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 26.887147654271335, |
|
"learning_rate": 4.3569329714950703e-07, |
|
"logits": -1.7291035652160645, |
|
"logps": -109.82331085205078, |
|
"loss": 0.3425, |
|
"objective": 0.33088982105255127, |
|
"ranking_simple": 0.7583333253860474, |
|
"regularize": 0.33088982105255127, |
|
"step": 620, |
|
"wo_beta": 3.5512850284576416 |
|
}, |
|
{ |
|
"dpo_loss": 0.3090989291667938, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 26.389383350627952, |
|
"learning_rate": 4.275037739658771e-07, |
|
"logits": -1.590990424156189, |
|
"logps": -110.23678588867188, |
|
"loss": 0.3234, |
|
"objective": 0.3090989291667938, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.3090989291667938, |
|
"step": 625, |
|
"wo_beta": 3.666703462600708 |
|
}, |
|
{ |
|
"dpo_loss": 0.33155128359794617, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 29.336966725897334, |
|
"learning_rate": 4.193340705265745e-07, |
|
"logits": -1.635860562324524, |
|
"logps": -114.24417114257812, |
|
"loss": 0.3204, |
|
"objective": 0.33155128359794617, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.33155128359794617, |
|
"step": 630, |
|
"wo_beta": 4.568333625793457 |
|
}, |
|
{ |
|
"dpo_loss": 0.3347407877445221, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 31.313014707447742, |
|
"learning_rate": 4.1118642034694565e-07, |
|
"logits": -1.7257325649261475, |
|
"logps": -116.03430938720703, |
|
"loss": 0.3376, |
|
"objective": 0.3347407877445221, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.3347407877445221, |
|
"step": 635, |
|
"wo_beta": 4.352345943450928 |
|
}, |
|
{ |
|
"dpo_loss": 0.30713585019111633, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 25.70905071821337, |
|
"learning_rate": 4.030630509131959e-07, |
|
"logits": -1.6731877326965332, |
|
"logps": -116.4244155883789, |
|
"loss": 0.3098, |
|
"objective": 0.30713585019111633, |
|
"ranking_simple": 0.7583333253860474, |
|
"regularize": 0.30713585019111633, |
|
"step": 640, |
|
"wo_beta": 4.2585673332214355 |
|
}, |
|
{ |
|
"dpo_loss": 0.29875513911247253, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 23.791400324977506, |
|
"learning_rate": 3.9496618307341713e-07, |
|
"logits": -1.7462607622146606, |
|
"logps": -116.83419036865234, |
|
"loss": 0.3005, |
|
"objective": 0.29875513911247253, |
|
"ranking_simple": 0.7791666388511658, |
|
"regularize": 0.29875513911247253, |
|
"step": 645, |
|
"wo_beta": 3.3428430557250977 |
|
}, |
|
{ |
|
"dpo_loss": 0.32320088148117065, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 32.03839388955998, |
|
"learning_rate": 3.8689803043042996e-07, |
|
"logits": -1.8062669038772583, |
|
"logps": -117.27250671386719, |
|
"loss": 0.3412, |
|
"objective": 0.32320088148117065, |
|
"ranking_simple": 0.7583333253860474, |
|
"regularize": 0.32320088148117065, |
|
"step": 650, |
|
"wo_beta": 3.019827127456665 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.7847943305969238, |
|
"eval_logits": -1.832274079322815, |
|
"eval_logps": -122.29435729980469, |
|
"eval_loss": 0.7749964594841003, |
|
"eval_objective": 0.7847943305969238, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.7847943305969238, |
|
"eval_runtime": 369.3337, |
|
"eval_samples_per_second": 15.677, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wo_beta": 9.94935417175293, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.3198917508125305, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 24.453465336538663, |
|
"learning_rate": 3.788607987366069e-07, |
|
"logits": -1.6524808406829834, |
|
"logps": -113.18647766113281, |
|
"loss": 0.333, |
|
"objective": 0.3198917508125305, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.3198917508125305, |
|
"step": 655, |
|
"wo_beta": 3.779681444168091 |
|
}, |
|
{ |
|
"dpo_loss": 0.30827051401138306, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 25.30249944675523, |
|
"learning_rate": 3.708566852908418e-07, |
|
"logits": -1.722990870475769, |
|
"logps": -117.13253021240234, |
|
"loss": 0.3093, |
|
"objective": 0.30827051401138306, |
|
"ranking_simple": 0.7250000238418579, |
|
"regularize": 0.30827051401138306, |
|
"step": 660, |
|
"wo_beta": 3.8630990982055664 |
|
}, |
|
{ |
|
"dpo_loss": 0.30362004041671753, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 26.577906210584395, |
|
"learning_rate": 3.6288787833783016e-07, |
|
"logits": -1.6925681829452515, |
|
"logps": -114.92183685302734, |
|
"loss": 0.3287, |
|
"objective": 0.30362004041671753, |
|
"ranking_simple": 0.7749999761581421, |
|
"regularize": 0.30362004041671753, |
|
"step": 665, |
|
"wo_beta": 3.2407257556915283 |
|
}, |
|
{ |
|
"dpo_loss": 0.33854812383651733, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 26.608491612605135, |
|
"learning_rate": 3.5495655646982503e-07, |
|
"logits": -1.632662057876587, |
|
"logps": -113.74718475341797, |
|
"loss": 0.3206, |
|
"objective": 0.33854812383651733, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.33854812383651733, |
|
"step": 670, |
|
"wo_beta": 4.751885890960693 |
|
}, |
|
{ |
|
"dpo_loss": 0.30450791120529175, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 27.707963800825, |
|
"learning_rate": 3.470648880310313e-07, |
|
"logits": -1.7001540660858154, |
|
"logps": -112.93498992919922, |
|
"loss": 0.3286, |
|
"objective": 0.30450791120529175, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.30450791120529175, |
|
"step": 675, |
|
"wo_beta": 2.8013789653778076 |
|
}, |
|
{ |
|
"dpo_loss": 0.29233846068382263, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 26.318074274927174, |
|
"learning_rate": 3.3921503052480236e-07, |
|
"logits": -1.7435904741287231, |
|
"logps": -115.07746887207031, |
|
"loss": 0.3108, |
|
"objective": 0.29233846068382263, |
|
"ranking_simple": 0.7833333611488342, |
|
"regularize": 0.29233846068382263, |
|
"step": 680, |
|
"wo_beta": 3.6125741004943848 |
|
}, |
|
{ |
|
"dpo_loss": 0.2992376685142517, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 26.980724381093548, |
|
"learning_rate": 3.314091300237999e-07, |
|
"logits": -1.6790062189102173, |
|
"logps": -115.0765380859375, |
|
"loss": 0.2818, |
|
"objective": 0.2992376685142517, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.2992376685142517, |
|
"step": 685, |
|
"wo_beta": 4.020833969116211 |
|
}, |
|
{ |
|
"dpo_loss": 0.2989169955253601, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 28.956498459281256, |
|
"learning_rate": 3.236493205832794e-07, |
|
"logits": -1.7138378620147705, |
|
"logps": -116.5325927734375, |
|
"loss": 0.3112, |
|
"objective": 0.2989169955253601, |
|
"ranking_simple": 0.762499988079071, |
|
"regularize": 0.2989169955253601, |
|
"step": 690, |
|
"wo_beta": 3.235595941543579 |
|
}, |
|
{ |
|
"dpo_loss": 0.3340277373790741, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 29.008978725819244, |
|
"learning_rate": 3.15937723657661e-07, |
|
"logits": -1.5779744386672974, |
|
"logps": -117.33056640625, |
|
"loss": 0.3178, |
|
"objective": 0.3340277373790741, |
|
"ranking_simple": 0.7416666746139526, |
|
"regularize": 0.3340277373790741, |
|
"step": 695, |
|
"wo_beta": 4.477265357971191 |
|
}, |
|
{ |
|
"dpo_loss": 0.3072899878025055, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 26.91852361479357, |
|
"learning_rate": 3.082764475205442e-07, |
|
"logits": -1.5716139078140259, |
|
"logps": -120.74311065673828, |
|
"loss": 0.3156, |
|
"objective": 0.3072899878025055, |
|
"ranking_simple": 0.7583333253860474, |
|
"regularize": 0.3072899878025055, |
|
"step": 700, |
|
"wo_beta": 3.845552682876587 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.813927948474884, |
|
"eval_logits": -1.8338414430618286, |
|
"eval_logps": -126.39391326904297, |
|
"eval_loss": 0.8013313412666321, |
|
"eval_objective": 0.813927948474884, |
|
"eval_ranking_simple": 0.5377846956253052, |
|
"eval_regularize": 0.813927948474884, |
|
"eval_runtime": 375.3584, |
|
"eval_samples_per_second": 15.425, |
|
"eval_steps_per_second": 1.287, |
|
"eval_wo_beta": 10.32465934753418, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.3367713391780853, |
|
"epoch": 1.9981105337742089, |
|
"grad_norm": 30.634677985646626, |
|
"learning_rate": 3.006675866883275e-07, |
|
"logits": -1.486984133720398, |
|
"logps": -120.58149719238281, |
|
"loss": 0.3202, |
|
"objective": 0.3367713391780853, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.3367713391780853, |
|
"step": 705, |
|
"wo_beta": 5.604025840759277 |
|
}, |
|
{ |
|
"dpo_loss": 0.26506486535072327, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 21.811542358240928, |
|
"learning_rate": 2.931132213475884e-07, |
|
"logits": -1.652250051498413, |
|
"logps": -117.85489654541016, |
|
"loss": 0.247, |
|
"objective": 0.26506486535072327, |
|
"ranking_simple": 0.7958333492279053, |
|
"regularize": 0.26506486535072327, |
|
"step": 710, |
|
"wo_beta": 2.941725015640259 |
|
}, |
|
{ |
|
"dpo_loss": 0.21083328127861023, |
|
"epoch": 2.026452527161077, |
|
"grad_norm": 22.009413556660768, |
|
"learning_rate": 2.856154167863814e-07, |
|
"logits": -1.7095146179199219, |
|
"logps": -118.10084533691406, |
|
"loss": 0.2213, |
|
"objective": 0.21083328127861023, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.21083328127861023, |
|
"step": 715, |
|
"wo_beta": 1.996678113937378 |
|
}, |
|
{ |
|
"dpo_loss": 0.1858675181865692, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 19.31295851177097, |
|
"learning_rate": 2.7817622282960813e-07, |
|
"logits": -1.6148954629898071, |
|
"logps": -123.07051086425781, |
|
"loss": 0.2017, |
|
"objective": 0.1858675181865692, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.1858675181865692, |
|
"step": 720, |
|
"wo_beta": 2.2542519569396973 |
|
}, |
|
{ |
|
"dpo_loss": 0.2097151279449463, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 24.65512776088815, |
|
"learning_rate": 2.707976732786166e-07, |
|
"logits": -1.686496376991272, |
|
"logps": -126.76118469238281, |
|
"loss": 0.2047, |
|
"objective": 0.2097151279449463, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.2097151279449463, |
|
"step": 725, |
|
"wo_beta": 2.5733823776245117 |
|
}, |
|
{ |
|
"dpo_loss": 0.23622439801692963, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 27.027861051451126, |
|
"learning_rate": 2.6348178535517965e-07, |
|
"logits": -1.7227706909179688, |
|
"logps": -126.69216918945312, |
|
"loss": 0.2175, |
|
"objective": 0.23622439801692963, |
|
"ranking_simple": 0.7749999761581421, |
|
"regularize": 0.23622439801692963, |
|
"step": 730, |
|
"wo_beta": 3.341784715652466 |
|
}, |
|
{ |
|
"dpo_loss": 0.23194490373134613, |
|
"epoch": 2.0831365139348135, |
|
"grad_norm": 29.247659239837198, |
|
"learning_rate": 2.5623055915000686e-07, |
|
"logits": -1.6128872632980347, |
|
"logps": -126.96920776367188, |
|
"loss": 0.2248, |
|
"objective": 0.23194490373134613, |
|
"ranking_simple": 0.7875000238418579, |
|
"regularize": 0.23194490373134613, |
|
"step": 735, |
|
"wo_beta": 3.5192384719848633 |
|
}, |
|
{ |
|
"dpo_loss": 0.2231958657503128, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 25.19144911793584, |
|
"learning_rate": 2.490459770759398e-07, |
|
"logits": -1.6792032718658447, |
|
"logps": -123.05906677246094, |
|
"loss": 0.2328, |
|
"objective": 0.2231958657503128, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.2231958657503128, |
|
"step": 740, |
|
"wo_beta": 2.758615016937256 |
|
}, |
|
{ |
|
"dpo_loss": 0.20981092751026154, |
|
"epoch": 2.1114785073216815, |
|
"grad_norm": 23.2038862143533, |
|
"learning_rate": 2.419300033259798e-07, |
|
"logits": -1.6360180377960205, |
|
"logps": -124.81270599365234, |
|
"loss": 0.2182, |
|
"objective": 0.20981092751026154, |
|
"ranking_simple": 0.7791666388511658, |
|
"regularize": 0.20981092751026154, |
|
"step": 745, |
|
"wo_beta": 3.1798312664031982 |
|
}, |
|
{ |
|
"dpo_loss": 0.22788210213184357, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 22.72550221909295, |
|
"learning_rate": 2.3488458333629773e-07, |
|
"logits": -1.6554747819900513, |
|
"logps": -122.24414825439453, |
|
"loss": 0.2183, |
|
"objective": 0.22788210213184357, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.22788210213184357, |
|
"step": 750, |
|
"wo_beta": 2.6969196796417236 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.8603518009185791, |
|
"eval_logits": -1.7998822927474976, |
|
"eval_logps": -131.12570190429688, |
|
"eval_loss": 0.8466524481773376, |
|
"eval_objective": 0.8603518009185791, |
|
"eval_ranking_simple": 0.5351966619491577, |
|
"eval_regularize": 0.8603518009185791, |
|
"eval_runtime": 370.8383, |
|
"eval_samples_per_second": 15.613, |
|
"eval_steps_per_second": 1.302, |
|
"eval_wo_beta": 10.893115997314453, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.2121828943490982, |
|
"epoch": 2.13982050070855, |
|
"grad_norm": 20.710190502520984, |
|
"learning_rate": 2.2791164325437046e-07, |
|
"logits": -1.6399970054626465, |
|
"logps": -126.25770568847656, |
|
"loss": 0.2128, |
|
"objective": 0.2121828943490982, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.2121828943490982, |
|
"step": 755, |
|
"wo_beta": 3.0046989917755127 |
|
}, |
|
{ |
|
"dpo_loss": 0.24169200658798218, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 22.67213482006544, |
|
"learning_rate": 2.21013089412392e-07, |
|
"logits": -1.6009403467178345, |
|
"logps": -119.49600219726562, |
|
"loss": 0.2344, |
|
"objective": 0.24169200658798218, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.24169200658798218, |
|
"step": 760, |
|
"wo_beta": 2.758802890777588 |
|
}, |
|
{ |
|
"dpo_loss": 0.2243640273809433, |
|
"epoch": 2.168162494095418, |
|
"grad_norm": 20.64816775047569, |
|
"learning_rate": 2.1419080780610122e-07, |
|
"logits": -1.6553146839141846, |
|
"logps": -120.87618255615234, |
|
"loss": 0.2361, |
|
"objective": 0.2243640273809433, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.2243640273809433, |
|
"step": 765, |
|
"wo_beta": 2.57806396484375 |
|
}, |
|
{ |
|
"dpo_loss": 0.2182272970676422, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 24.316819536544447, |
|
"learning_rate": 2.0744666357916925e-07, |
|
"logits": -1.6933544874191284, |
|
"logps": -120.17996978759766, |
|
"loss": 0.2274, |
|
"objective": 0.2182272970676422, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.2182272970676422, |
|
"step": 770, |
|
"wo_beta": 2.544562816619873 |
|
}, |
|
{ |
|
"dpo_loss": 0.20803479850292206, |
|
"epoch": 2.196504487482286, |
|
"grad_norm": 23.36541458579555, |
|
"learning_rate": 2.0078250051328782e-07, |
|
"logits": -1.6226826906204224, |
|
"logps": -122.02224731445312, |
|
"loss": 0.2183, |
|
"objective": 0.20803479850292206, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.20803479850292206, |
|
"step": 775, |
|
"wo_beta": 2.7029988765716553 |
|
}, |
|
{ |
|
"dpo_loss": 0.22727744281291962, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 24.538113581222706, |
|
"learning_rate": 1.942001405240979e-07, |
|
"logits": -1.5558067560195923, |
|
"logps": -124.58464813232422, |
|
"loss": 0.2171, |
|
"objective": 0.22727744281291962, |
|
"ranking_simple": 0.7541666626930237, |
|
"regularize": 0.22727744281291962, |
|
"step": 780, |
|
"wo_beta": 3.0669195652008057 |
|
}, |
|
{ |
|
"dpo_loss": 0.21583274006843567, |
|
"epoch": 2.2248464808691546, |
|
"grad_norm": 28.544460286246334, |
|
"learning_rate": 1.877013831630961e-07, |
|
"logits": -1.5640733242034912, |
|
"logps": -123.09439849853516, |
|
"loss": 0.2152, |
|
"objective": 0.21583274006843567, |
|
"ranking_simple": 0.7958333492279053, |
|
"regularize": 0.21583274006843567, |
|
"step": 785, |
|
"wo_beta": 3.0127415657043457 |
|
}, |
|
{ |
|
"dpo_loss": 0.1936068832874298, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 23.5808144420256, |
|
"learning_rate": 1.812880051256551e-07, |
|
"logits": -1.5408331155776978, |
|
"logps": -125.1629638671875, |
|
"loss": 0.2122, |
|
"objective": 0.1936068832874298, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.1936068832874298, |
|
"step": 790, |
|
"wo_beta": 2.2015647888183594 |
|
}, |
|
{ |
|
"dpo_loss": 0.21609367430210114, |
|
"epoch": 2.2531884742560226, |
|
"grad_norm": 23.539955858336363, |
|
"learning_rate": 1.7496175976529337e-07, |
|
"logits": -1.6351306438446045, |
|
"logps": -125.9599609375, |
|
"loss": 0.2139, |
|
"objective": 0.21609367430210114, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.21609367430210114, |
|
"step": 795, |
|
"wo_beta": 2.052849292755127 |
|
}, |
|
{ |
|
"dpo_loss": 0.2304428219795227, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 30.254441621642492, |
|
"learning_rate": 1.6872437661432516e-07, |
|
"logits": -1.6573865413665771, |
|
"logps": -127.17088317871094, |
|
"loss": 0.2338, |
|
"objective": 0.2304428219795227, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.2304428219795227, |
|
"step": 800, |
|
"wo_beta": 2.813727617263794 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.864130973815918, |
|
"eval_logits": -1.8069664239883423, |
|
"eval_logps": -132.1160125732422, |
|
"eval_loss": 0.8479817509651184, |
|
"eval_objective": 0.864130973815918, |
|
"eval_ranking_simple": 0.5351966619491577, |
|
"eval_regularize": 0.864130973815918, |
|
"eval_runtime": 369.7595, |
|
"eval_samples_per_second": 15.659, |
|
"eval_steps_per_second": 1.306, |
|
"eval_wo_beta": 10.98104476928711, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.24076558649539948, |
|
"epoch": 2.2815304676428907, |
|
"grad_norm": 22.143907441363098, |
|
"learning_rate": 1.62577560911024e-07, |
|
"logits": -1.645892858505249, |
|
"logps": -124.22594451904297, |
|
"loss": 0.2325, |
|
"objective": 0.24076558649539948, |
|
"ranking_simple": 0.762499988079071, |
|
"regularize": 0.24076558649539948, |
|
"step": 805, |
|
"wo_beta": 3.4595048427581787 |
|
}, |
|
{ |
|
"dpo_loss": 0.2532159686088562, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 22.775287720493225, |
|
"learning_rate": 1.565229931334277e-07, |
|
"logits": -1.7806832790374756, |
|
"logps": -121.71713256835938, |
|
"loss": 0.229, |
|
"objective": 0.2532159686088562, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.2532159686088562, |
|
"step": 810, |
|
"wo_beta": 2.6662535667419434 |
|
}, |
|
{ |
|
"dpo_loss": 0.21587124466896057, |
|
"epoch": 2.309872461029759, |
|
"grad_norm": 20.40928105754396, |
|
"learning_rate": 1.5056232853991208e-07, |
|
"logits": -1.6817148923873901, |
|
"logps": -123.68048858642578, |
|
"loss": 0.1989, |
|
"objective": 0.21587124466896057, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.21587124466896057, |
|
"step": 815, |
|
"wo_beta": 2.549884557723999 |
|
}, |
|
{ |
|
"dpo_loss": 0.21313165128231049, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 24.789515132235113, |
|
"learning_rate": 1.4469719671666043e-07, |
|
"logits": -1.6495434045791626, |
|
"logps": -124.32107543945312, |
|
"loss": 0.2149, |
|
"objective": 0.21313165128231049, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.21313165128231049, |
|
"step": 820, |
|
"wo_beta": 2.404456377029419 |
|
}, |
|
{ |
|
"dpo_loss": 0.21146027743816376, |
|
"epoch": 2.3382144544166272, |
|
"grad_norm": 21.660632425555523, |
|
"learning_rate": 1.389292011321498e-07, |
|
"logits": -1.692821979522705, |
|
"logps": -122.83853149414062, |
|
"loss": 0.2168, |
|
"objective": 0.21146027743816376, |
|
"ranking_simple": 0.8166666626930237, |
|
"regularize": 0.21146027743816376, |
|
"step": 825, |
|
"wo_beta": 2.4975247383117676 |
|
}, |
|
{ |
|
"dpo_loss": 0.19700536131858826, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 27.701358075237533, |
|
"learning_rate": 1.3325991869878012e-07, |
|
"logits": -1.6795495748519897, |
|
"logps": -126.65140533447266, |
|
"loss": 0.1967, |
|
"objective": 0.19700536131858826, |
|
"ranking_simple": 0.8374999761581421, |
|
"regularize": 0.19700536131858826, |
|
"step": 830, |
|
"wo_beta": 2.146075487136841 |
|
}, |
|
{ |
|
"dpo_loss": 0.21794618666172028, |
|
"epoch": 2.3665564478034957, |
|
"grad_norm": 29.238202346662906, |
|
"learning_rate": 1.2769089934176126e-07, |
|
"logits": -1.647661566734314, |
|
"logps": -125.17839813232422, |
|
"loss": 0.2288, |
|
"objective": 0.21794618666172028, |
|
"ranking_simple": 0.8416666388511658, |
|
"regularize": 0.21794618666172028, |
|
"step": 835, |
|
"wo_beta": 2.8974695205688477 |
|
}, |
|
{ |
|
"dpo_loss": 0.21900226175785065, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 24.009821339334245, |
|
"learning_rate": 1.222236655753791e-07, |
|
"logits": -1.6093765497207642, |
|
"logps": -125.18759155273438, |
|
"loss": 0.2184, |
|
"objective": 0.21900226175785065, |
|
"ranking_simple": 0.7833333611488342, |
|
"regularize": 0.21900226175785065, |
|
"step": 840, |
|
"wo_beta": 2.749152898788452 |
|
}, |
|
{ |
|
"dpo_loss": 0.24857226014137268, |
|
"epoch": 2.3948984411903638, |
|
"grad_norm": 27.5079137044431, |
|
"learning_rate": 1.1685971208675538e-07, |
|
"logits": -1.6249310970306396, |
|
"logps": -127.47116088867188, |
|
"loss": 0.2262, |
|
"objective": 0.24857226014137268, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.24857226014137268, |
|
"step": 845, |
|
"wo_beta": 2.7568535804748535 |
|
}, |
|
{ |
|
"dpo_loss": 0.2085387259721756, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 25.878018469347772, |
|
"learning_rate": 1.1160050532721527e-07, |
|
"logits": -1.67723548412323, |
|
"logps": -125.76563262939453, |
|
"loss": 0.2015, |
|
"objective": 0.2085387259721756, |
|
"ranking_simple": 0.7958333492279053, |
|
"regularize": 0.2085387259721756, |
|
"step": 850, |
|
"wo_beta": 2.600330352783203 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.8720477223396301, |
|
"eval_logits": -1.80176842212677, |
|
"eval_logps": -133.381103515625, |
|
"eval_loss": 0.8571510910987854, |
|
"eval_objective": 0.8720477223396301, |
|
"eval_ranking_simple": 0.5377846956253052, |
|
"eval_regularize": 0.8720477223396301, |
|
"eval_runtime": 369.6782, |
|
"eval_samples_per_second": 15.662, |
|
"eval_steps_per_second": 1.307, |
|
"eval_wo_beta": 11.025221824645996, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.2096593677997589, |
|
"epoch": 2.423240434577232, |
|
"grad_norm": 25.499456648461173, |
|
"learning_rate": 1.0644748311137375e-07, |
|
"logits": -1.6905667781829834, |
|
"logps": -123.53242492675781, |
|
"loss": 0.2195, |
|
"objective": 0.2096593677997589, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.2096593677997589, |
|
"step": 855, |
|
"wo_beta": 1.9907649755477905 |
|
}, |
|
{ |
|
"dpo_loss": 0.21979431807994843, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 24.848581285581997, |
|
"learning_rate": 1.0140205422405212e-07, |
|
"logits": -1.6141736507415771, |
|
"logps": -127.53914642333984, |
|
"loss": 0.2157, |
|
"objective": 0.21979431807994843, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.21979431807994843, |
|
"step": 860, |
|
"wo_beta": 3.0604774951934814 |
|
}, |
|
{ |
|
"dpo_loss": 0.2267284095287323, |
|
"epoch": 2.4515824279641003, |
|
"grad_norm": 26.5384738263413, |
|
"learning_rate": 9.646559803512993e-08, |
|
"logits": -1.6312936544418335, |
|
"logps": -125.63304138183594, |
|
"loss": 0.2262, |
|
"objective": 0.2267284095287323, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.2267284095287323, |
|
"step": 865, |
|
"wo_beta": 3.2560784816741943 |
|
}, |
|
{ |
|
"dpo_loss": 0.196714848279953, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 22.187392253095393, |
|
"learning_rate": 9.163946412243895e-08, |
|
"logits": -1.6977574825286865, |
|
"logps": -125.96756744384766, |
|
"loss": 0.2037, |
|
"objective": 0.196714848279953, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.196714848279953, |
|
"step": 870, |
|
"wo_beta": 2.583970308303833 |
|
}, |
|
{ |
|
"dpo_loss": 0.1924598515033722, |
|
"epoch": 2.4799244213509684, |
|
"grad_norm": 25.12579870814084, |
|
"learning_rate": 8.692497190280224e-08, |
|
"logits": -1.5863568782806396, |
|
"logps": -126.04496765136719, |
|
"loss": 0.2038, |
|
"objective": 0.1924598515033722, |
|
"ranking_simple": 0.8583333492279053, |
|
"regularize": 0.1924598515033722, |
|
"step": 875, |
|
"wo_beta": 2.296090602874756 |
|
}, |
|
{ |
|
"dpo_loss": 0.2359813004732132, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 23.176269104907163, |
|
"learning_rate": 8.232341027131883e-08, |
|
"logits": -1.5722264051437378, |
|
"logps": -125.54722595214844, |
|
"loss": 0.2224, |
|
"objective": 0.2359813004732132, |
|
"ranking_simple": 0.7666666507720947, |
|
"regularize": 0.2359813004732132, |
|
"step": 880, |
|
"wo_beta": 3.9758193492889404 |
|
}, |
|
{ |
|
"dpo_loss": 0.2542850375175476, |
|
"epoch": 2.5082664147378364, |
|
"grad_norm": 28.503952001031568, |
|
"learning_rate": 7.783603724899257e-08, |
|
"logits": -1.724973201751709, |
|
"logps": -123.99037170410156, |
|
"loss": 0.2326, |
|
"objective": 0.2542850375175476, |
|
"ranking_simple": 0.7958333492279053, |
|
"regularize": 0.2542850375175476, |
|
"step": 885, |
|
"wo_beta": 3.509568929672241 |
|
}, |
|
{ |
|
"dpo_loss": 0.20098893344402313, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 24.488143250342084, |
|
"learning_rate": 7.346407963880136e-08, |
|
"logits": -1.595066785812378, |
|
"logps": -123.90083312988281, |
|
"loss": 0.2056, |
|
"objective": 0.20098893344402313, |
|
"ranking_simple": 0.8291666507720947, |
|
"regularize": 0.20098893344402313, |
|
"step": 890, |
|
"wo_beta": 2.0867483615875244 |
|
}, |
|
{ |
|
"dpo_loss": 0.22675950825214386, |
|
"epoch": 2.536608408124705, |
|
"grad_norm": 26.1383864365769, |
|
"learning_rate": 6.92087326903022e-08, |
|
"logits": -1.6463298797607422, |
|
"logps": -126.22193908691406, |
|
"loss": 0.2235, |
|
"objective": 0.22675950825214386, |
|
"ranking_simple": 0.7833333611488342, |
|
"regularize": 0.22675950825214386, |
|
"step": 895, |
|
"wo_beta": 3.0455052852630615 |
|
}, |
|
{ |
|
"dpo_loss": 0.26797404885292053, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 29.177118299023412, |
|
"learning_rate": 6.507115977286143e-08, |
|
"logits": -1.5577763319015503, |
|
"logps": -123.61392974853516, |
|
"loss": 0.2348, |
|
"objective": 0.26797404885292053, |
|
"ranking_simple": 0.7749999761581421, |
|
"regularize": 0.26797404885292053, |
|
"step": 900, |
|
"wo_beta": 3.1443541049957275 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.8675441145896912, |
|
"eval_logits": -1.8113691806793213, |
|
"eval_logps": -133.67962646484375, |
|
"eval_loss": 0.8529655933380127, |
|
"eval_objective": 0.8675441145896912, |
|
"eval_ranking_simple": 0.5377846956253052, |
|
"eval_regularize": 0.8675441145896912, |
|
"eval_runtime": 369.0633, |
|
"eval_samples_per_second": 15.688, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 10.942305564880371, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.20284312963485718, |
|
"epoch": 2.564950401511573, |
|
"grad_norm": 19.950697729864736, |
|
"learning_rate": 6.105249205760127e-08, |
|
"logits": -1.6392539739608765, |
|
"logps": -123.07585906982422, |
|
"loss": 0.2053, |
|
"objective": 0.20284312963485718, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.20284312963485718, |
|
"step": 905, |
|
"wo_beta": 2.37481951713562 |
|
}, |
|
{ |
|
"dpo_loss": 0.23266027867794037, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 30.055735152469072, |
|
"learning_rate": 5.7153828208148846e-08, |
|
"logits": -1.6491973400115967, |
|
"logps": -127.52837371826172, |
|
"loss": 0.217, |
|
"objective": 0.23266027867794037, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.23266027867794037, |
|
"step": 910, |
|
"wo_beta": 3.36558198928833 |
|
}, |
|
{ |
|
"dpo_loss": 0.23302534222602844, |
|
"epoch": 2.593292394898441, |
|
"grad_norm": 22.890822851586893, |
|
"learning_rate": 5.337623408027292e-08, |
|
"logits": -1.7552512884140015, |
|
"logps": -127.87796783447266, |
|
"loss": 0.2121, |
|
"objective": 0.23302534222602844, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.23302534222602844, |
|
"step": 915, |
|
"wo_beta": 2.502667188644409 |
|
}, |
|
{ |
|
"dpo_loss": 0.20162495970726013, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 24.20273693189473, |
|
"learning_rate": 4.972074243048896e-08, |
|
"logits": -1.5814868211746216, |
|
"logps": -127.01163482666016, |
|
"loss": 0.2068, |
|
"objective": 0.20162495970726013, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.20162495970726013, |
|
"step": 920, |
|
"wo_beta": 1.8146071434020996 |
|
}, |
|
{ |
|
"dpo_loss": 0.21291884779930115, |
|
"epoch": 2.6216343882853095, |
|
"grad_norm": 22.388349767286606, |
|
"learning_rate": 4.6188352633713956e-08, |
|
"logits": -1.5830769538879395, |
|
"logps": -126.23300170898438, |
|
"loss": 0.2195, |
|
"objective": 0.21291884779930115, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.21291884779930115, |
|
"step": 925, |
|
"wo_beta": 3.1612021923065186 |
|
}, |
|
{ |
|
"dpo_loss": 0.24175626039505005, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 21.949168917598172, |
|
"learning_rate": 4.2780030410047796e-08, |
|
"logits": -1.6357704401016235, |
|
"logps": -125.97001647949219, |
|
"loss": 0.2179, |
|
"objective": 0.24175626039505005, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.24175626039505005, |
|
"step": 930, |
|
"wo_beta": 3.436295747756958 |
|
}, |
|
{ |
|
"dpo_loss": 0.22544310986995697, |
|
"epoch": 2.6499763816721775, |
|
"grad_norm": 28.54777226492267, |
|
"learning_rate": 3.949670756075446e-08, |
|
"logits": -1.6567264795303345, |
|
"logps": -123.90028381347656, |
|
"loss": 0.2428, |
|
"objective": 0.22544310986995697, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.22544310986995697, |
|
"step": 935, |
|
"wo_beta": 2.002594470977783 |
|
}, |
|
{ |
|
"dpo_loss": 0.19537684321403503, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 27.292491316004774, |
|
"learning_rate": 3.63392817135173e-08, |
|
"logits": -1.6852660179138184, |
|
"logps": -128.0804901123047, |
|
"loss": 0.2163, |
|
"objective": 0.19537684321403503, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.19537684321403503, |
|
"step": 940, |
|
"wo_beta": 3.2341361045837402 |
|
}, |
|
{ |
|
"dpo_loss": 0.21889939904212952, |
|
"epoch": 2.678318375059046, |
|
"grad_norm": 23.60505865361295, |
|
"learning_rate": 3.330861607703611e-08, |
|
"logits": -1.7046397924423218, |
|
"logps": -125.53500366210938, |
|
"loss": 0.206, |
|
"objective": 0.21889939904212952, |
|
"ranking_simple": 0.7875000238418579, |
|
"regularize": 0.21889939904212952, |
|
"step": 945, |
|
"wo_beta": 3.0100159645080566 |
|
}, |
|
{ |
|
"dpo_loss": 0.20888622105121613, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 24.640589732429273, |
|
"learning_rate": 3.040553920503502e-08, |
|
"logits": -1.5867191553115845, |
|
"logps": -123.99597930908203, |
|
"loss": 0.2268, |
|
"objective": 0.20888622105121613, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.20888622105121613, |
|
"step": 950, |
|
"wo_beta": 2.38991379737854 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.8684250116348267, |
|
"eval_logits": -1.8135783672332764, |
|
"eval_logps": -133.2828826904297, |
|
"eval_loss": 0.8525474667549133, |
|
"eval_objective": 0.8684250116348267, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.8684250116348267, |
|
"eval_runtime": 373.8008, |
|
"eval_samples_per_second": 15.49, |
|
"eval_steps_per_second": 1.292, |
|
"eval_wo_beta": 10.9784517288208, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.21907640993595123, |
|
"epoch": 2.706660368445914, |
|
"grad_norm": 25.73877923734523, |
|
"learning_rate": 2.7630844769743756e-08, |
|
"logits": -1.696962833404541, |
|
"logps": -125.86216735839844, |
|
"loss": 0.217, |
|
"objective": 0.21907640993595123, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.21907640993595123, |
|
"step": 955, |
|
"wo_beta": 2.471001148223877 |
|
}, |
|
{ |
|
"dpo_loss": 0.24159465730190277, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 27.398682783569342, |
|
"learning_rate": 2.4985291344915673e-08, |
|
"logits": -1.6559653282165527, |
|
"logps": -125.80078125, |
|
"loss": 0.2243, |
|
"objective": 0.24159465730190277, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.24159465730190277, |
|
"step": 960, |
|
"wo_beta": 3.4911081790924072 |
|
}, |
|
{ |
|
"dpo_loss": 0.17974473536014557, |
|
"epoch": 2.735002361832782, |
|
"grad_norm": 26.129978647933363, |
|
"learning_rate": 2.2469602198441573e-08, |
|
"logits": -1.6600605249404907, |
|
"logps": -125.82157897949219, |
|
"loss": 0.2216, |
|
"objective": 0.17974473536014557, |
|
"ranking_simple": 0.8458333611488342, |
|
"regularize": 0.17974473536014557, |
|
"step": 965, |
|
"wo_beta": 1.8301441669464111 |
|
}, |
|
{ |
|
"dpo_loss": 0.2389921396970749, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 24.93627247502558, |
|
"learning_rate": 2.008446509461498e-08, |
|
"logits": -1.6814639568328857, |
|
"logps": -127.4894027709961, |
|
"loss": 0.2119, |
|
"objective": 0.2389921396970749, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.2389921396970749, |
|
"step": 970, |
|
"wo_beta": 2.071624517440796 |
|
}, |
|
{ |
|
"dpo_loss": 0.21195697784423828, |
|
"epoch": 2.7633443552196506, |
|
"grad_norm": 24.386661793540505, |
|
"learning_rate": 1.7830532106104746e-08, |
|
"logits": -1.6035431623458862, |
|
"logps": -124.46774291992188, |
|
"loss": 0.2154, |
|
"objective": 0.21195697784423828, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.21195697784423828, |
|
"step": 975, |
|
"wo_beta": 3.027677059173584 |
|
}, |
|
{ |
|
"dpo_loss": 0.2177601158618927, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 30.722672013551914, |
|
"learning_rate": 1.570841943568446e-08, |
|
"logits": -1.7569483518600464, |
|
"logps": -126.10541534423828, |
|
"loss": 0.2135, |
|
"objective": 0.2177601158618927, |
|
"ranking_simple": 0.824999988079071, |
|
"regularize": 0.2177601158618927, |
|
"step": 980, |
|
"wo_beta": 1.981053113937378 |
|
}, |
|
{ |
|
"dpo_loss": 0.21660226583480835, |
|
"epoch": 2.7916863486065187, |
|
"grad_norm": 24.268020278235447, |
|
"learning_rate": 1.3718707247769134e-08, |
|
"logits": -1.5750867128372192, |
|
"logps": -122.41696166992188, |
|
"loss": 0.2142, |
|
"objective": 0.21660226583480835, |
|
"ranking_simple": 0.8208333253860474, |
|
"regularize": 0.21660226583480835, |
|
"step": 985, |
|
"wo_beta": 2.762291431427002 |
|
}, |
|
{ |
|
"dpo_loss": 0.23230423033237457, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 25.3957119119687, |
|
"learning_rate": 1.1861939509803686e-08, |
|
"logits": -1.626227617263794, |
|
"logps": -125.03184509277344, |
|
"loss": 0.2309, |
|
"objective": 0.23230423033237457, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.23230423033237457, |
|
"step": 990, |
|
"wo_beta": 3.499976396560669 |
|
}, |
|
{ |
|
"dpo_loss": 0.22918492555618286, |
|
"epoch": 2.820028341993387, |
|
"grad_norm": 26.589296858875272, |
|
"learning_rate": 1.0138623843548078e-08, |
|
"logits": -1.691062092781067, |
|
"logps": -126.53192901611328, |
|
"loss": 0.2269, |
|
"objective": 0.22918492555618286, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.22918492555618286, |
|
"step": 995, |
|
"wo_beta": 3.2248499393463135 |
|
}, |
|
{ |
|
"dpo_loss": 0.22611786425113678, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 23.891964045353923, |
|
"learning_rate": 8.54923138629815e-09, |
|
"logits": -1.6503469944000244, |
|
"logps": -122.57675170898438, |
|
"loss": 0.2198, |
|
"objective": 0.22611786425113678, |
|
"ranking_simple": 0.8333333134651184, |
|
"regularize": 0.22611786425113678, |
|
"step": 1000, |
|
"wo_beta": 2.427152395248413 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.8652148246765137, |
|
"eval_logits": -1.8167296648025513, |
|
"eval_logps": -132.88087463378906, |
|
"eval_loss": 0.8492961525917053, |
|
"eval_objective": 0.8652148246765137, |
|
"eval_ranking_simple": 0.5341615080833435, |
|
"eval_regularize": 0.8652148246765137, |
|
"eval_runtime": 368.2335, |
|
"eval_samples_per_second": 15.724, |
|
"eval_steps_per_second": 1.312, |
|
"eval_wo_beta": 10.938254356384277, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.24608786404132843, |
|
"epoch": 2.848370335380255, |
|
"grad_norm": 27.602968631540815, |
|
"learning_rate": 7.09419666208183e-09, |
|
"logits": -1.6333565711975098, |
|
"logps": -123.5007553100586, |
|
"loss": 0.2273, |
|
"objective": 0.24608786404132843, |
|
"ranking_simple": 0.7916666865348816, |
|
"regularize": 0.24608786404132843, |
|
"step": 1005, |
|
"wo_beta": 3.1694223880767822 |
|
}, |
|
{ |
|
"dpo_loss": 0.20009997487068176, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 25.090299269603783, |
|
"learning_rate": 5.773917462864264e-09, |
|
"logits": -1.6704093217849731, |
|
"logps": -125.71635437011719, |
|
"loss": 0.2037, |
|
"objective": 0.20009997487068176, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.20009997487068176, |
|
"step": 1010, |
|
"wo_beta": 2.577218532562256 |
|
}, |
|
{ |
|
"dpo_loss": 0.21036744117736816, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 28.329606007932064, |
|
"learning_rate": 4.588754739795586e-09, |
|
"logits": -1.6348304748535156, |
|
"logps": -122.55280303955078, |
|
"loss": 0.2113, |
|
"objective": 0.21036744117736816, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.21036744117736816, |
|
"step": 1015, |
|
"wo_beta": 2.199587345123291 |
|
}, |
|
{ |
|
"dpo_loss": 0.21461248397827148, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 24.62375043047653, |
|
"learning_rate": 3.53903250453047e-09, |
|
"logits": -1.5652154684066772, |
|
"logps": -125.25798797607422, |
|
"loss": 0.2252, |
|
"objective": 0.21461248397827148, |
|
"ranking_simple": 0.7708333134651184, |
|
"regularize": 0.21461248397827148, |
|
"step": 1020, |
|
"wo_beta": 3.251589059829712 |
|
}, |
|
{ |
|
"dpo_loss": 0.23392058908939362, |
|
"epoch": 2.9050543221539913, |
|
"grad_norm": 25.1548357787363, |
|
"learning_rate": 2.6250377406467627e-09, |
|
"logits": -1.6649322509765625, |
|
"logps": -124.19398498535156, |
|
"loss": 0.2291, |
|
"objective": 0.23392058908939362, |
|
"ranking_simple": 0.8083333373069763, |
|
"regularize": 0.23392058908939362, |
|
"step": 1025, |
|
"wo_beta": 2.934882879257202 |
|
}, |
|
{ |
|
"dpo_loss": 0.19529716670513153, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 23.978894569817292, |
|
"learning_rate": 1.8470203251865768e-09, |
|
"logits": -1.7404934167861938, |
|
"logps": -123.60317993164062, |
|
"loss": 0.2177, |
|
"objective": 0.19529716670513153, |
|
"ranking_simple": 0.7666666507720947, |
|
"regularize": 0.19529716670513153, |
|
"step": 1030, |
|
"wo_beta": 2.584693670272827 |
|
}, |
|
{ |
|
"dpo_loss": 0.19891007244586945, |
|
"epoch": 2.9333963155408598, |
|
"grad_norm": 22.489637587674896, |
|
"learning_rate": 1.2051929603428823e-09, |
|
"logits": -1.6503539085388184, |
|
"logps": -126.50735473632812, |
|
"loss": 0.2124, |
|
"objective": 0.19891007244586945, |
|
"ranking_simple": 0.8125, |
|
"regularize": 0.19891007244586945, |
|
"step": 1035, |
|
"wo_beta": 2.176048755645752 |
|
}, |
|
{ |
|
"dpo_loss": 0.21993538737297058, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 23.591827890954175, |
|
"learning_rate": 6.997311153086882e-10, |
|
"logits": -1.6743519306182861, |
|
"logps": -125.27057647705078, |
|
"loss": 0.2151, |
|
"objective": 0.21993538737297058, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.21993538737297058, |
|
"step": 1040, |
|
"wo_beta": 3.0266594886779785 |
|
}, |
|
{ |
|
"dpo_loss": 0.22178266942501068, |
|
"epoch": 2.961738308927728, |
|
"grad_norm": 26.41640596804321, |
|
"learning_rate": 3.3077297830541585e-10, |
|
"logits": -1.6230467557907104, |
|
"logps": -128.82781982421875, |
|
"loss": 0.2256, |
|
"objective": 0.22178266942501068, |
|
"ranking_simple": 0.8041666746139526, |
|
"regularize": 0.22178266942501068, |
|
"step": 1045, |
|
"wo_beta": 3.258082866668701 |
|
}, |
|
{ |
|
"dpo_loss": 0.21878859400749207, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 25.001515767941786, |
|
"learning_rate": 9.841941880361914e-11, |
|
"logits": -1.6594524383544922, |
|
"logps": -121.61707305908203, |
|
"loss": 0.2221, |
|
"objective": 0.21878859400749207, |
|
"ranking_simple": 0.7958333492279053, |
|
"regularize": 0.21878859400749207, |
|
"step": 1050, |
|
"wo_beta": 2.6373982429504395 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.8653033375740051, |
|
"eval_logits": -1.816504955291748, |
|
"eval_logps": -132.85667419433594, |
|
"eval_loss": 0.8493290543556213, |
|
"eval_objective": 0.8653033375740051, |
|
"eval_ranking_simple": 0.534679114818573, |
|
"eval_regularize": 0.8653033375740051, |
|
"eval_runtime": 369.7292, |
|
"eval_samples_per_second": 15.66, |
|
"eval_steps_per_second": 1.306, |
|
"eval_wo_beta": 10.94184398651123, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.22742925584316254, |
|
"epoch": 2.9900803023145963, |
|
"grad_norm": 25.264967316936268, |
|
"learning_rate": 2.7339599464326622e-12, |
|
"logits": -1.6433926820755005, |
|
"logps": -123.5830078125, |
|
"loss": 0.2221, |
|
"objective": 0.22742925584316254, |
|
"ranking_simple": 0.800000011920929, |
|
"regularize": 0.22742925584316254, |
|
"step": 1055, |
|
"wo_beta": 2.7176921367645264 |
|
}, |
|
{ |
|
"epoch": 2.992914501653283, |
|
"step": 1056, |
|
"total_flos": 0.0, |
|
"train_loss": 0.37872021553586377, |
|
"train_runtime": 34657.2213, |
|
"train_samples_per_second": 4.398, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|