hZzy's picture
Model save
21b76a2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 13.413590921123136,
"learning_rate": 1.1363636363636363e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6931466460227966,
"epoch": 0.02834199338686821,
"grad_norm": 13.328732837688287,
"learning_rate": 5.6818181818181815e-08,
"logits": -1.3678570985794067,
"logps": -84.42396545410156,
"loss": 0.413,
"objective": 0.37554118037223816,
"ranking_idealized": 0.6510416865348816,
"ranking_idealized_expo": 0.5572916865348816,
"ranking_simple": 0.546875,
"regularize": 0.37554118037223816,
"step": 5
},
{
"dpo_loss": 0.6923526525497437,
"epoch": 0.05668398677373642,
"grad_norm": 12.646793869664034,
"learning_rate": 1.1363636363636363e-07,
"logits": -1.4465404748916626,
"logps": -83.2779541015625,
"loss": 0.4172,
"objective": 0.4415889084339142,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 0.4415889084339142,
"step": 10
},
{
"dpo_loss": 0.6924737095832825,
"epoch": 0.08502598016060463,
"grad_norm": 12.521876797779205,
"learning_rate": 1.7045454545454543e-07,
"logits": -1.4127671718597412,
"logps": -83.22444152832031,
"loss": 0.4245,
"objective": 0.41116636991500854,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.41116636991500854,
"step": 15
},
{
"dpo_loss": 0.6923013925552368,
"epoch": 0.11336797354747284,
"grad_norm": 13.477735980439727,
"learning_rate": 2.2727272727272726e-07,
"logits": -1.4050496816635132,
"logps": -83.70260620117188,
"loss": 0.4131,
"objective": 0.40299364924430847,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.40299364924430847,
"step": 20
},
{
"dpo_loss": 0.6886129975318909,
"epoch": 0.14170996693434104,
"grad_norm": 13.118957044182272,
"learning_rate": 2.840909090909091e-07,
"logits": -1.4580955505371094,
"logps": -83.24189758300781,
"loss": 0.4088,
"objective": 0.4266737103462219,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.4266737103462219,
"step": 25
},
{
"dpo_loss": 0.6878851652145386,
"epoch": 0.17005196032120926,
"grad_norm": 13.707804900141507,
"learning_rate": 3.4090909090909085e-07,
"logits": -1.4379478693008423,
"logps": -83.61381530761719,
"loss": 0.4079,
"objective": 0.41071560978889465,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5333333611488342,
"regularize": 0.41071560978889465,
"step": 30
},
{
"dpo_loss": 0.683499276638031,
"epoch": 0.19839395370807747,
"grad_norm": 13.417059102073019,
"learning_rate": 3.977272727272727e-07,
"logits": -1.4155864715576172,
"logps": -82.38362121582031,
"loss": 0.4024,
"objective": 0.38590285181999207,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.38590285181999207,
"step": 35
},
{
"dpo_loss": 0.6810929775238037,
"epoch": 0.22673594709494568,
"grad_norm": 14.466457742258758,
"learning_rate": 4.545454545454545e-07,
"logits": -1.4129302501678467,
"logps": -83.20220184326172,
"loss": 0.4137,
"objective": 0.4313904047012329,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.512499988079071,
"regularize": 0.4313904047012329,
"step": 40
},
{
"dpo_loss": 0.676669180393219,
"epoch": 0.25507794048181387,
"grad_norm": 13.031938578727805,
"learning_rate": 5.113636363636363e-07,
"logits": -1.5500620603561401,
"logps": -84.26518249511719,
"loss": 0.4102,
"objective": 0.4602474272251129,
"ranking_idealized": 0.7208333611488342,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.4602474272251129,
"step": 45
},
{
"dpo_loss": 0.6746354699134827,
"epoch": 0.2834199338686821,
"grad_norm": 12.780839075585325,
"learning_rate": 5.681818181818182e-07,
"logits": -1.4677213430404663,
"logps": -81.97610473632812,
"loss": 0.4009,
"objective": 0.38121363520622253,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.46666666865348816,
"regularize": 0.38121363520622253,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6905611157417297,
"eval_logits": -1.5066354274749756,
"eval_logps": -90.34810638427734,
"eval_loss": 0.4076729416847229,
"eval_objective": 0.4091368019580841,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 0.4091368019580841,
"eval_runtime": 260.5812,
"eval_samples_per_second": 22.22,
"eval_steps_per_second": 0.929,
"step": 50
},
{
"dpo_loss": 0.6733829975128174,
"epoch": 0.3117619272555503,
"grad_norm": 14.366169874458807,
"learning_rate": 6.249999999999999e-07,
"logits": -1.5239266157150269,
"logps": -83.96293640136719,
"loss": 0.3923,
"objective": 0.3916258215904236,
"ranking_idealized": 0.7124999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3916258215904236,
"step": 55
},
{
"dpo_loss": 0.6722519993782043,
"epoch": 0.3401039206424185,
"grad_norm": 13.825071963348561,
"learning_rate": 6.818181818181817e-07,
"logits": -1.5167808532714844,
"logps": -84.09709167480469,
"loss": 0.3826,
"objective": 0.36959540843963623,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5458333492279053,
"regularize": 0.36959540843963623,
"step": 60
},
{
"dpo_loss": 0.6677471995353699,
"epoch": 0.3684459140292867,
"grad_norm": 14.718501577233726,
"learning_rate": 7.386363636363636e-07,
"logits": -1.4766509532928467,
"logps": -85.11907196044922,
"loss": 0.3687,
"objective": 0.3556104004383087,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5041666626930237,
"regularize": 0.3556104004383087,
"step": 65
},
{
"dpo_loss": 0.6649280786514282,
"epoch": 0.39678790741615494,
"grad_norm": 14.263348001605417,
"learning_rate": 7.954545454545454e-07,
"logits": -1.564154028892517,
"logps": -86.45026397705078,
"loss": 0.3702,
"objective": 0.4041551947593689,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5249999761581421,
"regularize": 0.4041551947593689,
"step": 70
},
{
"dpo_loss": 0.664714515209198,
"epoch": 0.42512990080302315,
"grad_norm": 14.983540223476671,
"learning_rate": 8.522727272727273e-07,
"logits": -1.589759349822998,
"logps": -86.56982421875,
"loss": 0.375,
"objective": 0.394586443901062,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5958333611488342,
"regularize": 0.394586443901062,
"step": 75
},
{
"dpo_loss": 0.6637415289878845,
"epoch": 0.45347189418989137,
"grad_norm": 15.092974710602856,
"learning_rate": 9.09090909090909e-07,
"logits": -1.6327601671218872,
"logps": -86.078857421875,
"loss": 0.3592,
"objective": 0.36432406306266785,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.550000011920929,
"regularize": 0.36432406306266785,
"step": 80
},
{
"dpo_loss": 0.6607963442802429,
"epoch": 0.4818138875767596,
"grad_norm": 13.665881861145971,
"learning_rate": 9.65909090909091e-07,
"logits": -1.6192591190338135,
"logps": -84.15817260742188,
"loss": 0.3518,
"objective": 0.3733659088611603,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5375000238418579,
"regularize": 0.3733659088611603,
"step": 85
},
{
"dpo_loss": 0.6554233431816101,
"epoch": 0.5101558809636277,
"grad_norm": 14.760661346851347,
"learning_rate": 9.999842657116664e-07,
"logits": -1.4328864812850952,
"logps": -87.27013397216797,
"loss": 0.3509,
"objective": 0.3566504418849945,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5,
"regularize": 0.3566504418849945,
"step": 90
},
{
"dpo_loss": 0.6343147158622742,
"epoch": 0.538497874350496,
"grad_norm": 14.360399578992759,
"learning_rate": 9.998072663403656e-07,
"logits": -1.518968105316162,
"logps": -87.2616958618164,
"loss": 0.3323,
"objective": 0.3436143696308136,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5958333611488342,
"regularize": 0.3436143696308136,
"step": 95
},
{
"dpo_loss": 0.6289714574813843,
"epoch": 0.5668398677373642,
"grad_norm": 14.235586512699614,
"learning_rate": 9.99433669591504e-07,
"logits": -1.6142817735671997,
"logps": -86.73899841308594,
"loss": 0.3456,
"objective": 0.3731040954589844,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5541666746139526,
"regularize": 0.3731040954589844,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.686673104763031,
"eval_logits": -1.6104196310043335,
"eval_logps": -92.6246109008789,
"eval_loss": 0.4036862850189209,
"eval_objective": 0.40806055068969727,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.40806055068969727,
"eval_runtime": 260.2239,
"eval_samples_per_second": 22.25,
"eval_steps_per_second": 0.93,
"step": 100
},
{
"dpo_loss": 0.6265436410903931,
"epoch": 0.5951818611242324,
"grad_norm": 17.534314390535563,
"learning_rate": 9.988636224180095e-07,
"logits": -1.4830114841461182,
"logps": -87.12000274658203,
"loss": 0.336,
"objective": 0.33996376395225525,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5791666507720947,
"regularize": 0.33996376395225525,
"step": 105
},
{
"dpo_loss": 0.6268933415412903,
"epoch": 0.6235238545111006,
"grad_norm": 17.309478536368452,
"learning_rate": 9.980973490458728e-07,
"logits": -1.6955339908599854,
"logps": -86.52151489257812,
"loss": 0.3327,
"objective": 0.3487900495529175,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.5291666388511658,
"regularize": 0.3487900495529175,
"step": 110
},
{
"dpo_loss": 0.6324852108955383,
"epoch": 0.6518658478979689,
"grad_norm": 16.226500081559077,
"learning_rate": 9.971351508859486e-07,
"logits": -1.6517375707626343,
"logps": -85.2242660522461,
"loss": 0.3154,
"objective": 0.312590092420578,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.512499988079071,
"regularize": 0.312590092420578,
"step": 115
},
{
"dpo_loss": 0.6312325596809387,
"epoch": 0.680207841284837,
"grad_norm": 15.958846654193053,
"learning_rate": 9.959774064153975e-07,
"logits": -1.5981483459472656,
"logps": -87.78892517089844,
"loss": 0.3121,
"objective": 0.2821680009365082,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5666666626930237,
"regularize": 0.2821680009365082,
"step": 120
},
{
"dpo_loss": 0.6170183420181274,
"epoch": 0.7085498346717053,
"grad_norm": 14.799813931917887,
"learning_rate": 9.94624571028813e-07,
"logits": -1.5694143772125244,
"logps": -88.2162094116211,
"loss": 0.3079,
"objective": 0.31248998641967773,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6083333492279053,
"regularize": 0.31248998641967773,
"step": 125
},
{
"dpo_loss": 0.630465567111969,
"epoch": 0.7368918280585735,
"grad_norm": 14.141148872850344,
"learning_rate": 9.930771768590933e-07,
"logits": -1.7086971998214722,
"logps": -85.12911987304688,
"loss": 0.3053,
"objective": 0.30258244276046753,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.5166666507720947,
"regularize": 0.30258244276046753,
"step": 130
},
{
"dpo_loss": 0.6269639134407043,
"epoch": 0.7652338214454416,
"grad_norm": 14.651813715107076,
"learning_rate": 9.91335832568129e-07,
"logits": -1.661932110786438,
"logps": -86.05510711669922,
"loss": 0.2876,
"objective": 0.298209547996521,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5666666626930237,
"regularize": 0.2982095181941986,
"step": 135
},
{
"dpo_loss": 0.6221628785133362,
"epoch": 0.7935758148323099,
"grad_norm": 15.239526231712386,
"learning_rate": 9.894012231073895e-07,
"logits": -1.550681710243225,
"logps": -86.62859344482422,
"loss": 0.2845,
"objective": 0.30004462599754333,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5458333492279053,
"regularize": 0.30004462599754333,
"step": 140
},
{
"dpo_loss": 0.608003556728363,
"epoch": 0.821917808219178,
"grad_norm": 14.863021825503232,
"learning_rate": 9.872741094484964e-07,
"logits": -1.6267261505126953,
"logps": -87.43079376220703,
"loss": 0.2862,
"objective": 0.29867058992385864,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5375000238418579,
"regularize": 0.29867058992385864,
"step": 145
},
{
"dpo_loss": 0.6166930794715881,
"epoch": 0.8502598016060463,
"grad_norm": 14.76588996791471,
"learning_rate": 9.849553282839024e-07,
"logits": -1.587580919265747,
"logps": -84.92633819580078,
"loss": 0.2786,
"objective": 0.27978453040122986,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5916666388511658,
"regularize": 0.27978453040122986,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6873137950897217,
"eval_logits": -1.6473405361175537,
"eval_logps": -94.62364196777344,
"eval_loss": 0.40614137053489685,
"eval_objective": 0.41305893659591675,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.41305893659591675,
"eval_runtime": 260.0727,
"eval_samples_per_second": 22.263,
"eval_steps_per_second": 0.931,
"step": 150
},
{
"dpo_loss": 0.605694591999054,
"epoch": 0.8786017949929145,
"grad_norm": 14.394381519568077,
"learning_rate": 9.824457916977784e-07,
"logits": -1.621727466583252,
"logps": -86.20561218261719,
"loss": 0.2759,
"objective": 0.2766547203063965,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6041666865348816,
"regularize": 0.2766547203063965,
"step": 155
},
{
"dpo_loss": 0.5983697175979614,
"epoch": 0.9069437883797827,
"grad_norm": 15.160839164653526,
"learning_rate": 9.797464868072486e-07,
"logits": -1.5286997556686401,
"logps": -89.5051498413086,
"loss": 0.2598,
"objective": 0.26661187410354614,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.574999988079071,
"regularize": 0.26661187410354614,
"step": 160
},
{
"dpo_loss": 0.6094165444374084,
"epoch": 0.9352857817666509,
"grad_norm": 13.97029915603188,
"learning_rate": 9.768584753741134e-07,
"logits": -1.5288485288619995,
"logps": -89.78202819824219,
"loss": 0.2722,
"objective": 0.2768023908138275,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2768023908138275,
"step": 165
},
{
"dpo_loss": 0.5933364033699036,
"epoch": 0.9636277751535192,
"grad_norm": 16.22461169876271,
"learning_rate": 9.737828933872073e-07,
"logits": -1.5362460613250732,
"logps": -89.80062866210938,
"loss": 0.2637,
"objective": 0.25175875425338745,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6000000238418579,
"regularize": 0.25175875425338745,
"step": 170
},
{
"dpo_loss": 0.5974230170249939,
"epoch": 0.9919697685403873,
"grad_norm": 15.239246649578357,
"learning_rate": 9.705209506155634e-07,
"logits": -1.4934788942337036,
"logps": -88.75602722167969,
"loss": 0.2634,
"objective": 0.24328327178955078,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6583333611488342,
"regularize": 0.24328327178955078,
"step": 175
},
{
"dpo_loss": 0.581199049949646,
"epoch": 1.0203117619272555,
"grad_norm": 14.774253000474259,
"learning_rate": 9.670739301325534e-07,
"logits": -1.6717146635055542,
"logps": -88.1180191040039,
"loss": 0.2291,
"objective": 0.2292441576719284,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6208333373069763,
"regularize": 0.2292441576719284,
"step": 180
},
{
"dpo_loss": 0.575748860836029,
"epoch": 1.0486537553141237,
"grad_norm": 17.006560148175247,
"learning_rate": 9.63443187811197e-07,
"logits": -1.5418639183044434,
"logps": -88.76116180419922,
"loss": 0.2333,
"objective": 0.2448757141828537,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6083333492279053,
"regularize": 0.2448757141828537,
"step": 185
},
{
"dpo_loss": 0.5784227848052979,
"epoch": 1.076995748700992,
"grad_norm": 15.70285096364104,
"learning_rate": 9.596301517908328e-07,
"logits": -1.5934358835220337,
"logps": -89.71295928955078,
"loss": 0.2135,
"objective": 0.19769170880317688,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.625,
"regularize": 0.19769169390201569,
"step": 190
},
{
"dpo_loss": 0.5825232267379761,
"epoch": 1.10533774208786,
"grad_norm": 14.396192466103011,
"learning_rate": 9.556363219153662e-07,
"logits": -1.5657871961593628,
"logps": -87.62194061279297,
"loss": 0.2125,
"objective": 0.1948522925376892,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.1948522925376892,
"step": 195
},
{
"dpo_loss": 0.5780203342437744,
"epoch": 1.1336797354747283,
"grad_norm": 14.49314507260273,
"learning_rate": 9.514632691433106e-07,
"logits": -1.6607592105865479,
"logps": -87.46318817138672,
"loss": 0.2075,
"objective": 0.20799687504768372,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.20799687504768372,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6855701804161072,
"eval_logits": -1.6490410566329956,
"eval_logps": -95.76744079589844,
"eval_loss": 0.408469021320343,
"eval_objective": 0.41199764609336853,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5175619721412659,
"eval_regularize": 0.41199764609336853,
"eval_runtime": 260.0694,
"eval_samples_per_second": 22.263,
"eval_steps_per_second": 0.931,
"step": 200
},
{
"dpo_loss": 0.5706655979156494,
"epoch": 1.1620217288615966,
"grad_norm": 15.635885064412406,
"learning_rate": 9.471126349298556e-07,
"logits": -1.6521145105361938,
"logps": -88.61875915527344,
"loss": 0.2091,
"objective": 0.2020285278558731,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5916666388511658,
"regularize": 0.2020285278558731,
"step": 205
},
{
"dpo_loss": 0.5730646252632141,
"epoch": 1.1903637222484649,
"grad_norm": 14.844650272500662,
"learning_rate": 9.425861305812081e-07,
"logits": -1.6272530555725098,
"logps": -87.98621368408203,
"loss": 0.208,
"objective": 0.2310691922903061,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5958333611488342,
"regularize": 0.2310691922903061,
"step": 210
},
{
"dpo_loss": 0.5831784009933472,
"epoch": 1.2187057156353331,
"grad_norm": 14.235853311462472,
"learning_rate": 9.378855365814557e-07,
"logits": -1.5842416286468506,
"logps": -88.42698669433594,
"loss": 0.1917,
"objective": 0.20643554627895355,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6000000238418579,
"regularize": 0.20643554627895355,
"step": 215
},
{
"dpo_loss": 0.574702799320221,
"epoch": 1.2470477090222012,
"grad_norm": 15.09350903860739,
"learning_rate": 9.330127018922193e-07,
"logits": -1.5366528034210205,
"logps": -89.38643646240234,
"loss": 0.1998,
"objective": 0.18881799280643463,
"ranking_idealized": 0.7208333611488342,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.6791666746139526,
"regularize": 0.18881799280643463,
"step": 220
},
{
"dpo_loss": 0.5676099061965942,
"epoch": 1.2753897024090695,
"grad_norm": 14.088714346139406,
"learning_rate": 9.279695432253708e-07,
"logits": -1.6117132902145386,
"logps": -90.31532287597656,
"loss": 0.2023,
"objective": 0.2032053917646408,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6458333134651184,
"regularize": 0.2032053917646408,
"step": 225
},
{
"dpo_loss": 0.5804415941238403,
"epoch": 1.3037316957959377,
"grad_norm": 14.005106959698436,
"learning_rate": 9.227580442891021e-07,
"logits": -1.5858609676361084,
"logps": -89.74004364013672,
"loss": 0.1904,
"objective": 0.188642218708992,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5916666388511658,
"regularize": 0.1886422038078308,
"step": 230
},
{
"dpo_loss": 0.5623538494110107,
"epoch": 1.3320736891828058,
"grad_norm": 15.259842003072114,
"learning_rate": 9.173802550076401e-07,
"logits": -1.6947582960128784,
"logps": -88.09205627441406,
"loss": 0.1949,
"objective": 0.19328099489212036,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6333333253860474,
"regularize": 0.19328099489212036,
"step": 235
},
{
"dpo_loss": 0.5668548345565796,
"epoch": 1.360415682569674,
"grad_norm": 14.478002369411158,
"learning_rate": 9.118382907149163e-07,
"logits": -1.6436070203781128,
"logps": -89.80839538574219,
"loss": 0.1801,
"objective": 0.1796088069677353,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.574999988079071,
"regularize": 0.1796088069677353,
"step": 240
},
{
"dpo_loss": 0.5793067812919617,
"epoch": 1.3887576759565423,
"grad_norm": 15.141311134036634,
"learning_rate": 9.061343313225087e-07,
"logits": -1.6800334453582764,
"logps": -89.72946166992188,
"loss": 0.19,
"objective": 0.1958913505077362,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6083333492279053,
"regularize": 0.1958913505077362,
"step": 245
},
{
"dpo_loss": 0.5501002669334412,
"epoch": 1.4170996693434104,
"grad_norm": 14.176924356730293,
"learning_rate": 9.002706204621802e-07,
"logits": -1.6303541660308838,
"logps": -88.4422836303711,
"loss": 0.1852,
"objective": 0.16911663115024567,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6333333253860474,
"regularize": 0.16911663115024567,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6845212578773499,
"eval_logits": -1.697658896446228,
"eval_logps": -95.10143280029297,
"eval_loss": 0.40446704626083374,
"eval_objective": 0.40795382857322693,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.40795382857322693,
"eval_runtime": 259.673,
"eval_samples_per_second": 22.297,
"eval_steps_per_second": 0.932,
"step": 250
},
{
"dpo_loss": 0.5670092701911926,
"epoch": 1.4454416627302786,
"grad_norm": 14.305797765290281,
"learning_rate": 8.942494646033554e-07,
"logits": -1.6137691736221313,
"logps": -88.44405364990234,
"loss": 0.1907,
"objective": 0.18229366838932037,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6416666507720947,
"regularize": 0.18229366838932037,
"step": 255
},
{
"dpo_loss": 0.5675135254859924,
"epoch": 1.473783656117147,
"grad_norm": 15.380169031973479,
"learning_rate": 8.880732321458784e-07,
"logits": -1.6693089008331299,
"logps": -89.93167877197266,
"loss": 0.1819,
"objective": 0.19000987708568573,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.574999988079071,
"regularize": 0.19000987708568573,
"step": 260
},
{
"dpo_loss": 0.5571501851081848,
"epoch": 1.5021256495040152,
"grad_norm": 15.264528602169092,
"learning_rate": 8.817443524884117e-07,
"logits": -1.6189838647842407,
"logps": -87.48661804199219,
"loss": 0.1865,
"objective": 0.19823399186134338,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6208333373069763,
"regularize": 0.19823399186134338,
"step": 265
},
{
"dpo_loss": 0.5573412775993347,
"epoch": 1.5304676428908834,
"grad_norm": 14.915226583029336,
"learning_rate": 8.752653150728411e-07,
"logits": -1.6289520263671875,
"logps": -89.79716491699219,
"loss": 0.1811,
"objective": 0.18896053731441498,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.612500011920929,
"regularize": 0.18896053731441498,
"step": 270
},
{
"dpo_loss": 0.5617662668228149,
"epoch": 1.5588096362777515,
"grad_norm": 14.461835236701942,
"learning_rate": 8.68638668405062e-07,
"logits": -1.6396280527114868,
"logps": -90.90117645263672,
"loss": 0.1786,
"objective": 0.16418127715587616,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.16418126225471497,
"step": 275
},
{
"dpo_loss": 0.5430324077606201,
"epoch": 1.5871516296646198,
"grad_norm": 14.442514240271057,
"learning_rate": 8.61867019052535e-07,
"logits": -1.5382746458053589,
"logps": -88.97178649902344,
"loss": 0.1774,
"objective": 0.17928847670555115,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6416666507720947,
"regularize": 0.17928847670555115,
"step": 280
},
{
"dpo_loss": 0.5505569577217102,
"epoch": 1.615493623051488,
"grad_norm": 16.245610088733493,
"learning_rate": 8.549530306190014e-07,
"logits": -1.6338722705841064,
"logps": -89.0966796875,
"loss": 0.1683,
"objective": 0.16002054512500763,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6333333253860474,
"regularize": 0.16002054512500763,
"step": 285
},
{
"dpo_loss": 0.5579850673675537,
"epoch": 1.643835616438356,
"grad_norm": 15.414341809456326,
"learning_rate": 8.478994226967638e-07,
"logits": -1.6751606464385986,
"logps": -88.0530776977539,
"loss": 0.1719,
"objective": 0.1747826188802719,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6541666388511658,
"regularize": 0.17478260397911072,
"step": 290
},
{
"dpo_loss": 0.5509874224662781,
"epoch": 1.6721776098252243,
"grad_norm": 13.923449942826227,
"learning_rate": 8.407089697969456e-07,
"logits": -1.5533292293548584,
"logps": -87.6349868774414,
"loss": 0.1765,
"objective": 0.17216673493385315,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6166666746139526,
"regularize": 0.17216673493385315,
"step": 295
},
{
"dpo_loss": 0.5574812293052673,
"epoch": 1.7005196032120926,
"grad_norm": 15.451129216726908,
"learning_rate": 8.333845002581458e-07,
"logits": -1.6305700540542603,
"logps": -88.86536407470703,
"loss": 0.172,
"objective": 0.19959108531475067,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6625000238418579,
"regularize": 0.19959108531475067,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6843295097351074,
"eval_logits": -1.640322208404541,
"eval_logps": -95.94416809082031,
"eval_loss": 0.4054754078388214,
"eval_objective": 0.4098014831542969,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.4098014831542969,
"eval_runtime": 260.124,
"eval_samples_per_second": 22.259,
"eval_steps_per_second": 0.93,
"step": 300
},
{
"dpo_loss": 0.5613775849342346,
"epoch": 1.7288615965989607,
"grad_norm": 14.905752064782224,
"learning_rate": 8.259288951339232e-07,
"logits": -1.60408616065979,
"logps": -89.15306091308594,
"loss": 0.1652,
"objective": 0.1750606745481491,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.612500011920929,
"regularize": 0.1750606745481491,
"step": 305
},
{
"dpo_loss": 0.5524806380271912,
"epoch": 1.7572035899858292,
"grad_norm": 15.166116258201686,
"learning_rate": 8.183450870595441e-07,
"logits": -1.6861586570739746,
"logps": -87.07334899902344,
"loss": 0.1671,
"objective": 0.1604899913072586,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6416666507720947,
"regularize": 0.1604899913072586,
"step": 310
},
{
"dpo_loss": 0.5647029280662537,
"epoch": 1.7855455833726972,
"grad_norm": 14.218932923228996,
"learning_rate": 8.106360590984404e-07,
"logits": -1.6006724834442139,
"logps": -87.93310546875,
"loss": 0.165,
"objective": 0.16339111328125,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.612500011920929,
"regularize": 0.1633910983800888,
"step": 315
},
{
"dpo_loss": 0.5607351064682007,
"epoch": 1.8138875767595655,
"grad_norm": 14.843987076974228,
"learning_rate": 8.028048435688333e-07,
"logits": -1.6807405948638916,
"logps": -90.42388153076172,
"loss": 0.1542,
"objective": 0.15479125082492828,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6333333253860474,
"regularize": 0.15479125082492828,
"step": 320
},
{
"dpo_loss": 0.5438184142112732,
"epoch": 1.8422295701464337,
"grad_norm": 15.354095003208785,
"learning_rate": 7.948545208509811e-07,
"logits": -1.7393078804016113,
"logps": -92.29595184326172,
"loss": 0.1626,
"objective": 0.15665055811405182,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6791666746139526,
"regularize": 0.15665055811405182,
"step": 325
},
{
"dpo_loss": 0.5477765798568726,
"epoch": 1.8705715635333018,
"grad_norm": 14.393471500808245,
"learning_rate": 7.86788218175523e-07,
"logits": -1.4602495431900024,
"logps": -89.03805541992188,
"loss": 0.1588,
"objective": 0.15682725608348846,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6166666746139526,
"regularize": 0.15682725608348846,
"step": 330
},
{
"dpo_loss": 0.562004029750824,
"epoch": 1.89891355692017,
"grad_norm": 14.386579659864832,
"learning_rate": 7.786091083933949e-07,
"logits": -1.6249334812164307,
"logps": -88.8255844116211,
"loss": 0.1531,
"objective": 0.14314964413642883,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.612500011920929,
"regularize": 0.14314964413642883,
"step": 335
},
{
"dpo_loss": 0.5628435015678406,
"epoch": 1.9272555503070383,
"grad_norm": 14.464107878774616,
"learning_rate": 7.703204087277988e-07,
"logits": -1.6546257734298706,
"logps": -90.56217956542969,
"loss": 0.1527,
"objective": 0.1476096212863922,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6666666865348816,
"regularize": 0.1476096212863922,
"step": 340
},
{
"dpo_loss": 0.5599310994148254,
"epoch": 1.9555975436939064,
"grad_norm": 14.799147392008404,
"learning_rate": 7.619253795087208e-07,
"logits": -1.647698998451233,
"logps": -89.57904815673828,
"loss": 0.1417,
"objective": 0.13018347322940826,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6166666746139526,
"regularize": 0.13018347322940826,
"step": 345
},
{
"dpo_loss": 0.5558927655220032,
"epoch": 1.9839395370807746,
"grad_norm": 14.874212978278477,
"learning_rate": 7.534273228904915e-07,
"logits": -1.5429824590682983,
"logps": -90.79412078857422,
"loss": 0.1504,
"objective": 0.14253783226013184,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.6000000238418579,
"regularize": 0.14253783226013184,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6839829683303833,
"eval_logits": -1.6734960079193115,
"eval_logps": -96.38378143310547,
"eval_loss": 0.4065950810909271,
"eval_objective": 0.40939342975616455,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 0.40939342975616455,
"eval_runtime": 259.5874,
"eval_samples_per_second": 22.305,
"eval_steps_per_second": 0.932,
"step": 350
},
{
"dpo_loss": 0.5446032285690308,
"epoch": 2.012281530467643,
"grad_norm": 14.92352390433015,
"learning_rate": 7.448295815528956e-07,
"logits": -1.5243901014328003,
"logps": -89.14817810058594,
"loss": 0.1469,
"objective": 0.13605408370494843,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6708333492279053,
"regularize": 0.13605406880378723,
"step": 355
},
{
"dpo_loss": 0.5358213186264038,
"epoch": 2.040623523854511,
"grad_norm": 17.820965696851154,
"learning_rate": 7.361355373863413e-07,
"logits": -1.6093335151672363,
"logps": -88.62029266357422,
"loss": 0.1269,
"objective": 0.13526426255702972,
"ranking_idealized": 0.7250000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.699999988079071,
"regularize": 0.13526426255702972,
"step": 360
},
{
"dpo_loss": 0.5384168028831482,
"epoch": 2.0689655172413794,
"grad_norm": 16.00523341015884,
"learning_rate": 7.273486101616056e-07,
"logits": -1.6718920469284058,
"logps": -90.12538146972656,
"loss": 0.1321,
"objective": 0.12407148629426956,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6708333492279053,
"regularize": 0.12407147884368896,
"step": 365
},
{
"dpo_loss": 0.5496992468833923,
"epoch": 2.0973075106282475,
"grad_norm": 14.939079070952808,
"learning_rate": 7.184722561846797e-07,
"logits": -1.6580688953399658,
"logps": -87.11526489257812,
"loss": 0.1168,
"objective": 0.12028197199106216,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6208333373069763,
"regularize": 0.12028196454048157,
"step": 370
},
{
"dpo_loss": 0.5366081595420837,
"epoch": 2.1256495040151155,
"grad_norm": 14.800979136941399,
"learning_rate": 7.095099669372443e-07,
"logits": -1.6352336406707764,
"logps": -89.77750396728516,
"loss": 0.1247,
"objective": 0.11821580678224564,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.7083333134651184,
"regularize": 0.11821580678224564,
"step": 375
},
{
"dpo_loss": 0.5499475002288818,
"epoch": 2.153991497401984,
"grad_norm": 13.76934472823536,
"learning_rate": 7.004652677033068e-07,
"logits": -1.5623695850372314,
"logps": -88.50102996826172,
"loss": 0.1232,
"objective": 0.10588161647319794,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10588161647319794,
"step": 380
},
{
"dpo_loss": 0.5542941689491272,
"epoch": 2.182333490788852,
"grad_norm": 14.633555067363714,
"learning_rate": 6.913417161825449e-07,
"logits": -1.6838804483413696,
"logps": -89.8261947631836,
"loss": 0.1154,
"objective": 0.09328292310237885,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6416666507720947,
"regularize": 0.09328292310237885,
"step": 385
},
{
"dpo_loss": 0.5482229590415955,
"epoch": 2.21067548417572,
"grad_norm": 15.478260370876582,
"learning_rate": 6.821429010908971e-07,
"logits": -1.559071660041809,
"logps": -89.70718383789062,
"loss": 0.121,
"objective": 0.11915615200996399,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5958333611488342,
"regularize": 0.1191561371088028,
"step": 390
},
{
"dpo_loss": 0.5234766006469727,
"epoch": 2.2390174775625886,
"grad_norm": 14.815951181180777,
"learning_rate": 6.728724407489553e-07,
"logits": -1.5655180215835571,
"logps": -90.58793640136719,
"loss": 0.1231,
"objective": 0.11187195777893066,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6833333373069763,
"regularize": 0.11187195777893066,
"step": 395
},
{
"dpo_loss": 0.5324122309684753,
"epoch": 2.2673594709494567,
"grad_norm": 15.066758948045832,
"learning_rate": 6.635339816587108e-07,
"logits": -1.6402709484100342,
"logps": -90.86196899414062,
"loss": 0.1241,
"objective": 0.12945452332496643,
"ranking_idealized": 0.7583333253860474,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.7208333611488342,
"regularize": 0.12945450842380524,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6844344735145569,
"eval_logits": -1.6892694234848022,
"eval_logps": -95.98338317871094,
"eval_loss": 0.40758705139160156,
"eval_objective": 0.411220520734787,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.411220520734787,
"eval_runtime": 261.5788,
"eval_samples_per_second": 22.135,
"eval_steps_per_second": 0.925,
"step": 400
},
{
"dpo_loss": 0.5428405404090881,
"epoch": 2.295701464336325,
"grad_norm": 15.346220951371636,
"learning_rate": 6.541311970692162e-07,
"logits": -1.7154464721679688,
"logps": -88.2754135131836,
"loss": 0.1221,
"objective": 0.13386504352092743,
"ranking_idealized": 0.737500011920929,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6666666865348816,
"regularize": 0.13386504352092743,
"step": 405
},
{
"dpo_loss": 0.5366548895835876,
"epoch": 2.324043457723193,
"grad_norm": 15.316836223077656,
"learning_rate": 6.446677855317264e-07,
"logits": -1.5751118659973145,
"logps": -88.22239685058594,
"loss": 0.1133,
"objective": 0.10883895307779312,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.625,
"regularize": 0.10883894562721252,
"step": 410
},
{
"dpo_loss": 0.5400758385658264,
"epoch": 2.3523854511100613,
"grad_norm": 15.140337757347018,
"learning_rate": 6.351474694448864e-07,
"logits": -1.6428126096725464,
"logps": -88.48053741455078,
"loss": 0.1166,
"objective": 0.1268073171377182,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.637499988079071,
"regularize": 0.1268073171377182,
"step": 415
},
{
"dpo_loss": 0.5541034936904907,
"epoch": 2.3807274444969297,
"grad_norm": 14.807008378400798,
"learning_rate": 6.255739935905395e-07,
"logits": -1.5306849479675293,
"logps": -90.66093444824219,
"loss": 0.1148,
"objective": 0.11297421902418137,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.612500011920929,
"regularize": 0.11297421902418137,
"step": 420
},
{
"dpo_loss": 0.5390843152999878,
"epoch": 2.409069437883798,
"grad_norm": 14.236861276355002,
"learning_rate": 6.159511236607315e-07,
"logits": -1.5861257314682007,
"logps": -88.8576431274414,
"loss": 0.1119,
"objective": 0.112046018242836,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.637499988079071,
"regularize": 0.1120460107922554,
"step": 425
},
{
"dpo_loss": 0.5440212488174438,
"epoch": 2.4374114312706663,
"grad_norm": 14.636334382365337,
"learning_rate": 6.062826447764883e-07,
"logits": -1.6188839673995972,
"logps": -90.33170318603516,
"loss": 0.1066,
"objective": 0.09743823856115341,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6333333253860474,
"regularize": 0.09743823856115341,
"step": 430
},
{
"dpo_loss": 0.5423538088798523,
"epoch": 2.4657534246575343,
"grad_norm": 14.885310195485024,
"learning_rate": 5.965723599989528e-07,
"logits": -1.6494262218475342,
"logps": -90.86329650878906,
"loss": 0.1193,
"objective": 0.1321849673986435,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6291666626930237,
"regularize": 0.1321849673986435,
"step": 435
},
{
"dpo_loss": 0.5417699813842773,
"epoch": 2.4940954180444024,
"grad_norm": 15.13405575560551,
"learning_rate": 5.868240888334652e-07,
"logits": -1.5878384113311768,
"logps": -89.1788330078125,
"loss": 0.1086,
"objective": 0.10985619574785233,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.10985619574785233,
"step": 440
},
{
"dpo_loss": 0.5431756377220154,
"epoch": 2.5224374114312704,
"grad_norm": 14.711454767729204,
"learning_rate": 5.770416657271728e-07,
"logits": -1.616743803024292,
"logps": -86.75231170654297,
"loss": 0.1097,
"objective": 0.11407394707202911,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6833333373069763,
"regularize": 0.11407394707202911,
"step": 445
},
{
"dpo_loss": 0.5639354586601257,
"epoch": 2.550779404818139,
"grad_norm": 13.941250536582869,
"learning_rate": 5.67228938560766e-07,
"logits": -1.6488417387008667,
"logps": -89.29744720458984,
"loss": 0.1083,
"objective": 0.1078432947397232,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6333333253860474,
"regularize": 0.1078432947397232,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6838362216949463,
"eval_logits": -1.681365728378296,
"eval_logps": -96.42753601074219,
"eval_loss": 0.40609824657440186,
"eval_objective": 0.40940061211586,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 0.40940061211586,
"eval_runtime": 259.9201,
"eval_samples_per_second": 22.276,
"eval_steps_per_second": 0.931,
"step": 450
},
{
"dpo_loss": 0.5353375673294067,
"epoch": 2.579121398205007,
"grad_norm": 14.879466038094796,
"learning_rate": 5.573897671349268e-07,
"logits": -1.6768704652786255,
"logps": -89.45830535888672,
"loss": 0.1065,
"objective": 0.11618896573781967,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6666666865348816,
"regularize": 0.11618896573781967,
"step": 455
},
{
"dpo_loss": 0.5305708646774292,
"epoch": 2.6074633915918755,
"grad_norm": 14.078420242787786,
"learning_rate": 5.475280216520912e-07,
"logits": -1.7477765083312988,
"logps": -89.77506256103516,
"loss": 0.1089,
"objective": 0.1040702536702156,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6458333134651184,
"regularize": 0.10407023876905441,
"step": 460
},
{
"dpo_loss": 0.5392042994499207,
"epoch": 2.6358053849787435,
"grad_norm": 14.620942850653412,
"learning_rate": 5.376475811941191e-07,
"logits": -1.6256554126739502,
"logps": -89.18009185791016,
"loss": 0.1057,
"objective": 0.11545146256685257,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6000000238418579,
"regularize": 0.11545146256685257,
"step": 465
},
{
"dpo_loss": 0.5424565076828003,
"epoch": 2.6641473783656116,
"grad_norm": 15.533051216052591,
"learning_rate": 5.277523321964701e-07,
"logits": -1.6505295038223267,
"logps": -88.97268676757812,
"loss": 0.102,
"objective": 0.11157210916280746,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.637499988079071,
"regularize": 0.11157210916280746,
"step": 470
},
{
"dpo_loss": 0.5482778549194336,
"epoch": 2.69248937175248,
"grad_norm": 14.564992109935103,
"learning_rate": 5.178461669194903e-07,
"logits": -1.5990585088729858,
"logps": -87.48114776611328,
"loss": 0.1038,
"objective": 0.10224457085132599,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6499999761581421,
"regularize": 0.10224457085132599,
"step": 475
},
{
"dpo_loss": 0.5498027205467224,
"epoch": 2.720831365139348,
"grad_norm": 15.14165653036781,
"learning_rate": 5.07932981917404e-07,
"logits": -1.686042070388794,
"logps": -86.90522766113281,
"loss": 0.1017,
"objective": 0.10218793898820877,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10218793898820877,
"step": 480
},
{
"dpo_loss": 0.5342879891395569,
"epoch": 2.7491733585262166,
"grad_norm": 14.745717143899025,
"learning_rate": 4.980166765056193e-07,
"logits": -1.592150330543518,
"logps": -89.95211791992188,
"loss": 0.1028,
"objective": 0.09829958528280258,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6875,
"regularize": 0.09829958528280258,
"step": 485
},
{
"dpo_loss": 0.5277873873710632,
"epoch": 2.7775153519130846,
"grad_norm": 15.397799376978266,
"learning_rate": 4.881011512269463e-07,
"logits": -1.6200255155563354,
"logps": -87.80931091308594,
"loss": 0.1078,
"objective": 0.11417750269174576,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6541666388511658,
"regularize": 0.11417750269174576,
"step": 490
},
{
"dpo_loss": 0.5365422368049622,
"epoch": 2.8058573452999527,
"grad_norm": 14.75494539326366,
"learning_rate": 4.78190306317332e-07,
"logits": -1.631138563156128,
"logps": -87.2429428100586,
"loss": 0.0967,
"objective": 0.0883258655667305,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.625,
"regularize": 0.0883258506655693,
"step": 495
},
{
"dpo_loss": 0.5285528302192688,
"epoch": 2.8341993386868207,
"grad_norm": 14.831004301691246,
"learning_rate": 4.682880401717177e-07,
"logits": -1.6615116596221924,
"logps": -86.21328735351562,
"loss": 0.0989,
"objective": 0.0958656519651413,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6708333492279053,
"regularize": 0.0958656445145607,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6844313144683838,
"eval_logits": -1.6797000169754028,
"eval_logps": -95.7645034790039,
"eval_loss": 0.4076074957847595,
"eval_objective": 0.4115086793899536,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5175619721412659,
"eval_regularize": 0.4115086793899536,
"eval_runtime": 263.4539,
"eval_samples_per_second": 21.977,
"eval_steps_per_second": 0.919,
"step": 500
},
{
"dpo_loss": 0.5365604758262634,
"epoch": 2.862541332073689,
"grad_norm": 15.368802903715196,
"learning_rate": 4.5839824781061886e-07,
"logits": -1.5910155773162842,
"logps": -88.20460510253906,
"loss": 0.0982,
"objective": 0.10264171659946442,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6708333492279053,
"regularize": 0.10264171659946442,
"step": 505
},
{
"dpo_loss": 0.5401506423950195,
"epoch": 2.8908833254605573,
"grad_norm": 14.750995001535633,
"learning_rate": 4.4852481934803277e-07,
"logits": -1.546883463859558,
"logps": -88.82188415527344,
"loss": 0.0938,
"objective": 0.09417784959077835,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6499999761581421,
"regularize": 0.09417784959077835,
"step": 510
},
{
"dpo_loss": 0.5448324084281921,
"epoch": 2.9192253188474258,
"grad_norm": 14.256889048024082,
"learning_rate": 4.3867163846127674e-07,
"logits": -1.7348732948303223,
"logps": -88.37023162841797,
"loss": 0.0959,
"objective": 0.09633953124284744,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6541666388511658,
"regularize": 0.09633953124284744,
"step": 515
},
{
"dpo_loss": 0.5417373776435852,
"epoch": 2.947567312234294,
"grad_norm": 15.293723935135468,
"learning_rate": 4.2884258086335745e-07,
"logits": -1.6043357849121094,
"logps": -90.03458404541016,
"loss": 0.0903,
"objective": 0.08684458583593369,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6416666507720947,
"regularize": 0.08684458583593369,
"step": 520
},
{
"dpo_loss": 0.5415874719619751,
"epoch": 2.975909305621162,
"grad_norm": 14.348085330611658,
"learning_rate": 4.1904151277847305e-07,
"logits": -1.616308569908142,
"logps": -89.7189712524414,
"loss": 0.0923,
"objective": 0.10915852338075638,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.675000011920929,
"regularize": 0.10915852338075638,
"step": 525
},
{
"dpo_loss": 0.5288244485855103,
"epoch": 3.0042512990080303,
"grad_norm": 14.702772615303738,
"learning_rate": 4.092722894212487e-07,
"logits": -1.6195777654647827,
"logps": -89.2164535522461,
"loss": 0.0961,
"objective": 0.10630013048648834,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6541666388511658,
"regularize": 0.10630011558532715,
"step": 530
},
{
"dpo_loss": 0.5415112376213074,
"epoch": 3.0325932923948984,
"grad_norm": 14.653134844595096,
"learning_rate": 3.995387534803005e-07,
"logits": -1.6762583255767822,
"logps": -90.93753051757812,
"loss": 0.0853,
"objective": 0.09499659389257431,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6625000238418579,
"regularize": 0.09499659389257431,
"step": 535
},
{
"dpo_loss": 0.5392681956291199,
"epoch": 3.0609352857817664,
"grad_norm": 15.143937078082786,
"learning_rate": 3.8984473360672967e-07,
"logits": -1.7662190198898315,
"logps": -88.38541412353516,
"loss": 0.0781,
"objective": 0.06802941113710403,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6458333134651184,
"regularize": 0.06802940368652344,
"step": 540
},
{
"dpo_loss": 0.5409398674964905,
"epoch": 3.089277279168635,
"grad_norm": 14.949184489866422,
"learning_rate": 3.801940429081345e-07,
"logits": -1.6874436140060425,
"logps": -89.9101791381836,
"loss": 0.0793,
"objective": 0.08502420783042908,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.612500011920929,
"regularize": 0.08502420783042908,
"step": 545
},
{
"dpo_loss": 0.530200719833374,
"epoch": 3.117619272555503,
"grad_norm": 15.128964804198388,
"learning_rate": 3.7059047744873955e-07,
"logits": -1.5058013200759888,
"logps": -89.7841567993164,
"loss": 0.0857,
"objective": 0.08006270974874496,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.699999988079071,
"regularize": 0.08006270974874496,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.6840906143188477,
"eval_logits": -1.6864055395126343,
"eval_logps": -96.7056655883789,
"eval_loss": 0.40704044699668884,
"eval_objective": 0.41078415513038635,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 0.41078415513038635,
"eval_runtime": 259.6327,
"eval_samples_per_second": 22.301,
"eval_steps_per_second": 0.932,
"step": 550
},
{
"dpo_loss": 0.5227282047271729,
"epoch": 3.1459612659423715,
"grad_norm": 15.414705329848626,
"learning_rate": 3.6103781475622786e-07,
"logits": -1.6148103475570679,
"logps": -90.73031616210938,
"loss": 0.0822,
"objective": 0.08249496668577194,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.675000011920929,
"regularize": 0.08249495923519135,
"step": 555
},
{
"dpo_loss": 0.531684935092926,
"epoch": 3.1743032593292395,
"grad_norm": 15.644698799260503,
"learning_rate": 3.5153981233586274e-07,
"logits": -1.5928341150283813,
"logps": -88.10628509521484,
"loss": 0.0755,
"objective": 0.0785578116774559,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6541666388511658,
"regularize": 0.07855779677629471,
"step": 560
},
{
"dpo_loss": 0.5370542407035828,
"epoch": 3.2026452527161076,
"grad_norm": 16.61476983622863,
"learning_rate": 3.421002061924876e-07,
"logits": -1.6470757722854614,
"logps": -88.91401672363281,
"loss": 0.078,
"objective": 0.08550135046243668,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6666666865348816,
"regularize": 0.08550134301185608,
"step": 565
},
{
"dpo_loss": 0.5315267443656921,
"epoch": 3.230987246102976,
"grad_norm": 16.219608431973352,
"learning_rate": 3.327227093609824e-07,
"logits": -1.5615371465682983,
"logps": -88.98600006103516,
"loss": 0.0804,
"objective": 0.08551181107759476,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6208333373069763,
"regularize": 0.08551178872585297,
"step": 570
},
{
"dpo_loss": 0.5380645990371704,
"epoch": 3.259329239489844,
"grad_norm": 15.599509722839054,
"learning_rate": 3.234110104457536e-07,
"logits": -1.65503990650177,
"logps": -89.4914321899414,
"loss": 0.0761,
"objective": 0.08411499112844467,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6708333492279053,
"regularize": 0.08411499112844467,
"step": 575
},
{
"dpo_loss": 0.5447063446044922,
"epoch": 3.287671232876712,
"grad_norm": 14.709745903577328,
"learning_rate": 3.141687721698363e-07,
"logits": -1.6440101861953735,
"logps": -90.87422943115234,
"loss": 0.0766,
"objective": 0.06781290471553802,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6499999761581421,
"regularize": 0.06781289726495743,
"step": 580
},
{
"dpo_loss": 0.544808566570282,
"epoch": 3.3160132262635806,
"grad_norm": 14.95898016236234,
"learning_rate": 3.049996299341742e-07,
"logits": -1.67787766456604,
"logps": -90.5733871459961,
"loss": 0.077,
"objective": 0.07407450675964355,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6791666746139526,
"regularize": 0.07407449930906296,
"step": 585
},
{
"dpo_loss": 0.5435771346092224,
"epoch": 3.3443552196504487,
"grad_norm": 16.129826241390184,
"learning_rate": 2.959071903876486e-07,
"logits": -1.689025640487671,
"logps": -90.51628875732422,
"loss": 0.0756,
"objective": 0.060641638934612274,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6541666388511658,
"regularize": 0.06064163148403168,
"step": 590
},
{
"dpo_loss": 0.5342642664909363,
"epoch": 3.372697213037317,
"grad_norm": 15.00734617596817,
"learning_rate": 2.86895030008416e-07,
"logits": -1.6080927848815918,
"logps": -89.73548126220703,
"loss": 0.0743,
"objective": 0.07631801813840866,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6416666507720947,
"regularize": 0.07631801068782806,
"step": 595
},
{
"dpo_loss": 0.517924964427948,
"epoch": 3.4010392064241852,
"grad_norm": 14.448635430187423,
"learning_rate": 2.779666936971129e-07,
"logits": -1.6234545707702637,
"logps": -89.56751251220703,
"loss": 0.0723,
"objective": 0.07954961806535721,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6625000238418579,
"regularize": 0.07954961806535721,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.6840184330940247,
"eval_logits": -1.6933923959732056,
"eval_logps": -96.77139282226562,
"eval_loss": 0.4082699120044708,
"eval_objective": 0.41121208667755127,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.41121208667755127,
"eval_runtime": 261.2463,
"eval_samples_per_second": 22.163,
"eval_steps_per_second": 0.926,
"step": 600
},
{
"dpo_loss": 0.544577956199646,
"epoch": 3.4293811998110533,
"grad_norm": 15.344467617289204,
"learning_rate": 2.6912569338248315e-07,
"logits": -1.677703857421875,
"logps": -91.40158081054688,
"loss": 0.0712,
"objective": 0.07372996211051941,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6541666388511658,
"regularize": 0.07372996211051941,
"step": 605
},
{
"dpo_loss": 0.5388127565383911,
"epoch": 3.4577231931979218,
"grad_norm": 14.865347998745246,
"learning_rate": 2.603755066399718e-07,
"logits": -1.6282384395599365,
"logps": -89.45064544677734,
"loss": 0.0716,
"objective": 0.06525908410549164,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6541666388511658,
"regularize": 0.06525907665491104,
"step": 610
},
{
"dpo_loss": 0.5260815620422363,
"epoch": 3.48606518658479,
"grad_norm": 13.789444853566813,
"learning_rate": 2.517195753238345e-07,
"logits": -1.6580389738082886,
"logps": -90.04885864257812,
"loss": 0.071,
"objective": 0.0720449835062027,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.6041666865348816,
"regularize": 0.0720449835062027,
"step": 615
},
{
"dpo_loss": 0.5217207670211792,
"epoch": 3.514407179971658,
"grad_norm": 13.823586682790467,
"learning_rate": 2.4316130421329696e-07,
"logits": -1.5293523073196411,
"logps": -89.2181625366211,
"loss": 0.071,
"objective": 0.06358211487531662,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6333333253860474,
"regularize": 0.06358211487531662,
"step": 620
},
{
"dpo_loss": 0.5324966311454773,
"epoch": 3.5427491733585263,
"grad_norm": 15.423238852250835,
"learning_rate": 2.3470405967329604e-07,
"logits": -1.6097266674041748,
"logps": -87.8069076538086,
"loss": 0.0677,
"objective": 0.0661839172244072,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6625000238418579,
"regularize": 0.0661839097738266,
"step": 625
},
{
"dpo_loss": 0.548510730266571,
"epoch": 3.5710911667453944,
"grad_norm": 14.649308039914828,
"learning_rate": 2.2635116833033392e-07,
"logits": -1.6878260374069214,
"logps": -88.3050765991211,
"loss": 0.0735,
"objective": 0.07757600396871567,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6333333253860474,
"regularize": 0.07757599651813507,
"step": 630
},
{
"dpo_loss": 0.5425411462783813,
"epoch": 3.5994331601322624,
"grad_norm": 15.066685089968594,
"learning_rate": 2.181059157639598e-07,
"logits": -1.6372451782226562,
"logps": -88.1607666015625,
"loss": 0.0663,
"objective": 0.06245482712984085,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6208333373069763,
"regularize": 0.06245479732751846,
"step": 635
},
{
"dpo_loss": 0.5177238583564758,
"epoch": 3.627775153519131,
"grad_norm": 14.538482874181112,
"learning_rate": 2.0997154521440097e-07,
"logits": -1.565827488899231,
"logps": -89.97610473632812,
"loss": 0.0639,
"objective": 0.07200702279806137,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.6166666746139526,
"regularize": 0.07200699299573898,
"step": 640
},
{
"dpo_loss": 0.5268372893333435,
"epoch": 3.656117146905999,
"grad_norm": 14.714394354372583,
"learning_rate": 2.0195125630684428e-07,
"logits": -1.5924190282821655,
"logps": -87.3132553100586,
"loss": 0.0666,
"objective": 0.06948961317539215,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6416666507720947,
"regularize": 0.06948959827423096,
"step": 645
},
{
"dpo_loss": 0.5451396703720093,
"epoch": 3.6844591402928675,
"grad_norm": 14.392341763219553,
"learning_rate": 1.9404820379287672e-07,
"logits": -1.5905060768127441,
"logps": -88.871337890625,
"loss": 0.0603,
"objective": 0.05459137260913849,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.637499988079071,
"regularize": 0.05459136515855789,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.6846064329147339,
"eval_logits": -1.6889305114746094,
"eval_logps": -95.68582916259766,
"eval_loss": 0.40852683782577515,
"eval_objective": 0.4125712513923645,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.4125712513923645,
"eval_runtime": 260.1365,
"eval_samples_per_second": 22.258,
"eval_steps_per_second": 0.93,
"step": 650
},
{
"dpo_loss": 0.5369049906730652,
"epoch": 3.7128011336797355,
"grad_norm": 14.716721190910569,
"learning_rate": 1.8626549630957395e-07,
"logits": -1.6153782606124878,
"logps": -88.33747100830078,
"loss": 0.0672,
"objective": 0.061821334064006805,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6458333134651184,
"regularize": 0.06182133033871651,
"step": 655
},
{
"dpo_loss": 0.5374411344528198,
"epoch": 3.7411431270666036,
"grad_norm": 14.492926045642323,
"learning_rate": 1.7860619515673032e-07,
"logits": -1.7408215999603271,
"logps": -88.87337493896484,
"loss": 0.0663,
"objective": 0.06822895258665085,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6833333373069763,
"regularize": 0.06822895258665085,
"step": 660
},
{
"dpo_loss": 0.5354861617088318,
"epoch": 3.769485120453472,
"grad_norm": 15.39617438199354,
"learning_rate": 1.7107331309270684e-07,
"logits": -1.6346544027328491,
"logps": -88.30725860595703,
"loss": 0.0664,
"objective": 0.06315968185663223,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.675000011920929,
"regularize": 0.06315967440605164,
"step": 665
},
{
"dpo_loss": 0.5363784432411194,
"epoch": 3.79782711384034,
"grad_norm": 15.749195791822254,
"learning_rate": 1.6366981314937372e-07,
"logits": -1.7264405488967896,
"logps": -88.70533752441406,
"loss": 0.0642,
"objective": 0.07602261006832123,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.7333333492279053,
"regularize": 0.07602259516716003,
"step": 670
},
{
"dpo_loss": 0.5361024737358093,
"epoch": 3.826169107227208,
"grad_norm": 14.754855609421504,
"learning_rate": 1.5639860746661338e-07,
"logits": -1.674873948097229,
"logps": -87.9578857421875,
"loss": 0.0639,
"objective": 0.057048484683036804,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6583333611488342,
"regularize": 0.057048480957746506,
"step": 675
},
{
"dpo_loss": 0.546769917011261,
"epoch": 3.8545111006140766,
"grad_norm": 16.28642539144059,
"learning_rate": 1.492625561468393e-07,
"logits": -1.6343268156051636,
"logps": -88.65126037597656,
"loss": 0.0635,
"objective": 0.0622069351375103,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6291666626930237,
"regularize": 0.062206920236349106,
"step": 680
},
{
"dpo_loss": 0.5258111357688904,
"epoch": 3.8828530940009447,
"grad_norm": 15.221617480212148,
"learning_rate": 1.4226446612998671e-07,
"logits": -1.7157925367355347,
"logps": -87.88172149658203,
"loss": 0.0595,
"objective": 0.051657140254974365,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6791666746139526,
"regularize": 0.05165712162852287,
"step": 685
},
{
"dpo_loss": 0.5301392674446106,
"epoch": 3.9111950873878127,
"grad_norm": 14.71114521040863,
"learning_rate": 1.3540709008941147e-07,
"logits": -1.64604651927948,
"logps": -86.96089935302734,
"loss": 0.0627,
"objective": 0.05803840234875679,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6875,
"regularize": 0.05803839489817619,
"step": 690
},
{
"dpo_loss": 0.5404530763626099,
"epoch": 3.9395370807746812,
"grad_norm": 14.300468193938293,
"learning_rate": 1.2869312534913685e-07,
"logits": -1.5944476127624512,
"logps": -88.3801498413086,
"loss": 0.065,
"objective": 0.06985396891832352,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.6041666865348816,
"regularize": 0.06985396891832352,
"step": 695
},
{
"dpo_loss": 0.5349648594856262,
"epoch": 3.9678790741615493,
"grad_norm": 14.848538983349796,
"learning_rate": 1.2212521282287093e-07,
"logits": -1.640931487083435,
"logps": -88.85566711425781,
"loss": 0.0658,
"objective": 0.06192382797598839,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6083333492279053,
"regularize": 0.06192382052540779,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.6843095421791077,
"eval_logits": -1.6961629390716553,
"eval_logps": -95.92644500732422,
"eval_loss": 0.40860193967819214,
"eval_objective": 0.41189059615135193,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.41189059615135193,
"eval_runtime": 260.0344,
"eval_samples_per_second": 22.266,
"eval_steps_per_second": 0.931,
"step": 700
},
{
"dpo_loss": 0.5312646627426147,
"epoch": 3.9962210675484178,
"grad_norm": 14.809174954752928,
"learning_rate": 1.15705935975212e-07,
"logits": -1.5398000478744507,
"logps": -87.27682495117188,
"loss": 0.065,
"objective": 0.06780902296304703,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6625000238418579,
"regularize": 0.06780902296304703,
"step": 705
},
{
"dpo_loss": 0.5272520780563354,
"epoch": 4.024563060935286,
"grad_norm": 15.262991388495523,
"learning_rate": 1.094378198054533e-07,
"logits": -1.63226318359375,
"logps": -88.91695404052734,
"loss": 0.0593,
"objective": 0.0699063390493393,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6499999761581421,
"regularize": 0.0699063315987587,
"step": 710
},
{
"dpo_loss": 0.5363659262657166,
"epoch": 4.052905054322154,
"grad_norm": 16.13258446285675,
"learning_rate": 1.0332332985438247e-07,
"logits": -1.5873126983642578,
"logps": -88.78561401367188,
"loss": 0.0526,
"objective": 0.05241125822067261,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6666666865348816,
"regularize": 0.052411243319511414,
"step": 715
},
{
"dpo_loss": 0.521473228931427,
"epoch": 4.081247047709022,
"grad_norm": 15.52892462347426,
"learning_rate": 9.736487123447068e-08,
"logits": -1.499315619468689,
"logps": -91.1855697631836,
"loss": 0.0526,
"objective": 0.04763669893145561,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6458333134651184,
"regularize": 0.04763669893145561,
"step": 720
},
{
"dpo_loss": 0.5466646552085876,
"epoch": 4.109589041095891,
"grad_norm": 15.248617054291332,
"learning_rate": 9.156478768383058e-08,
"logits": -1.6146501302719116,
"logps": -88.77969360351562,
"loss": 0.0565,
"objective": 0.05335882678627968,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6416666507720947,
"regularize": 0.05335881933569908,
"step": 725
},
{
"dpo_loss": 0.5291512608528137,
"epoch": 4.137931034482759,
"grad_norm": 15.105310283399879,
"learning_rate": 8.592536064431466e-08,
"logits": -1.687792181968689,
"logps": -88.5714340209961,
"loss": 0.0564,
"objective": 0.05620870739221573,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6583333611488342,
"regularize": 0.05620870366692543,
"step": 730
},
{
"dpo_loss": 0.5384243130683899,
"epoch": 4.166273027869627,
"grad_norm": 15.990640609108542,
"learning_rate": 8.044880836411888e-08,
"logits": -1.5998725891113281,
"logps": -89.7979736328125,
"loss": 0.0511,
"objective": 0.047256775200366974,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6791666746139526,
"regularize": 0.04725676402449608,
"step": 735
},
{
"dpo_loss": 0.5150901079177856,
"epoch": 4.194615021256495,
"grad_norm": 14.62250055664198,
"learning_rate": 7.513728502524286e-08,
"logits": -1.5944503545761108,
"logps": -89.62586212158203,
"loss": 0.0534,
"objective": 0.05004221946001053,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6708333492279053,
"regularize": 0.05004219710826874,
"step": 740
},
{
"dpo_loss": 0.5289852023124695,
"epoch": 4.222957014643363,
"grad_norm": 15.558949956501518,
"learning_rate": 6.999287989614971e-08,
"logits": -1.6679482460021973,
"logps": -86.46797180175781,
"loss": 0.0517,
"objective": 0.05193919688463211,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6291666626930237,
"regularize": 0.051939185708761215,
"step": 745
},
{
"dpo_loss": 0.5270615220069885,
"epoch": 4.251299008030231,
"grad_norm": 17.47564517685443,
"learning_rate": 6.501761650996052e-08,
"logits": -1.787042498588562,
"logps": -88.59746551513672,
"loss": 0.0521,
"objective": 0.061865612864494324,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6416666507720947,
"regularize": 0.06186559051275253,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.6843414902687073,
"eval_logits": -1.6900094747543335,
"eval_logps": -95.91879272460938,
"eval_loss": 0.4082893431186676,
"eval_objective": 0.41186439990997314,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.41186439990997314,
"eval_runtime": 260.1497,
"eval_samples_per_second": 22.256,
"eval_steps_per_second": 0.93,
"step": 750
},
{
"dpo_loss": 0.529739260673523,
"epoch": 4.2796410014171,
"grad_norm": 14.371086729219567,
"learning_rate": 6.021345186850418e-08,
"logits": -1.6882461309432983,
"logps": -88.36084747314453,
"loss": 0.0539,
"objective": 0.05107791721820831,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6541666388511658,
"regularize": 0.05107790231704712,
"step": 755
},
{
"dpo_loss": 0.5281350612640381,
"epoch": 4.307982994803968,
"grad_norm": 16.117421305971423,
"learning_rate": 5.5582275672538316e-08,
"logits": -1.6556658744812012,
"logps": -89.51905822753906,
"loss": 0.0458,
"objective": 0.04781101644039154,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6875,
"regularize": 0.04781101644039154,
"step": 760
},
{
"dpo_loss": 0.51561039686203,
"epoch": 4.336324988190836,
"grad_norm": 14.994145013136821,
"learning_rate": 5.112590957844232e-08,
"logits": -1.697762370109558,
"logps": -89.37593078613281,
"loss": 0.0528,
"objective": 0.054953668266534805,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.7250000238418579,
"regularize": 0.05495365336537361,
"step": 765
},
{
"dpo_loss": 0.5164503455162048,
"epoch": 4.364666981577704,
"grad_norm": 13.892500924387873,
"learning_rate": 4.684610648167503e-08,
"logits": -1.5592148303985596,
"logps": -87.5302734375,
"loss": 0.0516,
"objective": 0.04956016317009926,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6541666388511658,
"regularize": 0.049560148268938065,
"step": 770
},
{
"dpo_loss": 0.5293823480606079,
"epoch": 4.393008974964572,
"grad_norm": 14.429815473976188,
"learning_rate": 4.274454982728032e-08,
"logits": -1.614122986793518,
"logps": -88.37311553955078,
"loss": 0.05,
"objective": 0.05319143459200859,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6666666865348816,
"regularize": 0.053191423416137695,
"step": 775
},
{
"dpo_loss": 0.5353278517723083,
"epoch": 4.42135096835144,
"grad_norm": 14.827503239956233,
"learning_rate": 3.882285294770937e-08,
"logits": -1.673889398574829,
"logps": -86.99459075927734,
"loss": 0.0486,
"objective": 0.050799839198589325,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6333333253860474,
"regularize": 0.05079982802271843,
"step": 780
},
{
"dpo_loss": 0.5483137369155884,
"epoch": 4.449692961738309,
"grad_norm": 13.762782844472444,
"learning_rate": 3.508255842822255e-08,
"logits": -1.6981257200241089,
"logps": -87.26993560791016,
"loss": 0.0577,
"objective": 0.060258664190769196,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6333333253860474,
"regularize": 0.0602586455643177,
"step": 785
},
{
"dpo_loss": 0.5247166156768799,
"epoch": 4.478034955125177,
"grad_norm": 14.343748620962772,
"learning_rate": 3.15251375001192e-08,
"logits": -1.6197153329849243,
"logps": -89.92550659179688,
"loss": 0.0519,
"objective": 0.06482961773872375,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.7166666388511658,
"regularize": 0.06482960283756256,
"step": 790
},
{
"dpo_loss": 0.5297049880027771,
"epoch": 4.506376948512045,
"grad_norm": 14.241023462964922,
"learning_rate": 2.8151989462033787e-08,
"logits": -1.5452524423599243,
"logps": -89.73005676269531,
"loss": 0.0535,
"objective": 0.0436834916472435,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6499999761581421,
"regularize": 0.043683480471372604,
"step": 795
},
{
"dpo_loss": 0.5308266878128052,
"epoch": 4.534718941898913,
"grad_norm": 14.965902621487606,
"learning_rate": 2.4964441129527335e-08,
"logits": -1.5640093088150024,
"logps": -88.82685089111328,
"loss": 0.0529,
"objective": 0.056653544306755066,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6666666865348816,
"regularize": 0.05665354058146477,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.684349000453949,
"eval_logits": -1.6918096542358398,
"eval_logps": -95.80995178222656,
"eval_loss": 0.4081306457519531,
"eval_objective": 0.41187557578086853,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.41187557578086853,
"eval_runtime": 260.1251,
"eval_samples_per_second": 22.259,
"eval_steps_per_second": 0.93,
"step": 800
},
{
"dpo_loss": 0.5310518145561218,
"epoch": 4.563060935285781,
"grad_norm": 14.644913526257378,
"learning_rate": 2.1963746313188757e-08,
"logits": -1.612503170967102,
"logps": -87.55821990966797,
"loss": 0.0533,
"objective": 0.05946908891201019,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.637499988079071,
"regularize": 0.0594690665602684,
"step": 805
},
{
"dpo_loss": 0.5326514840126038,
"epoch": 4.59140292867265,
"grad_norm": 14.784289994959185,
"learning_rate": 1.915108532545351e-08,
"logits": -1.7028088569641113,
"logps": -87.26454162597656,
"loss": 0.0486,
"objective": 0.0378708690404892,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5874999761581421,
"regularize": 0.0378708578646183,
"step": 810
},
{
"dpo_loss": 0.5349629521369934,
"epoch": 4.619744922059518,
"grad_norm": 13.924074189602024,
"learning_rate": 1.6527564516331638e-08,
"logits": -1.5550504922866821,
"logps": -88.41017150878906,
"loss": 0.0474,
"objective": 0.03614622354507446,
"ranking_idealized": 0.7250000238418579,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.7208333611488342,
"regularize": 0.036146197468042374,
"step": 815
},
{
"dpo_loss": 0.5395488142967224,
"epoch": 4.648086915446386,
"grad_norm": 14.728879231266445,
"learning_rate": 1.4094215838229172e-08,
"logits": -1.6426359415054321,
"logps": -88.95872497558594,
"loss": 0.0497,
"objective": 0.04463572055101395,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6916666626930237,
"regularize": 0.044635698199272156,
"step": 820
},
{
"dpo_loss": 0.5434182286262512,
"epoch": 4.6764289088332545,
"grad_norm": 14.648978111763995,
"learning_rate": 1.1851996440033318e-08,
"logits": -1.5325695276260376,
"logps": -86.70441436767578,
"loss": 0.048,
"objective": 0.04530922695994377,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.612500011920929,
"regularize": 0.04530921205878258,
"step": 825
},
{
"dpo_loss": 0.5301134586334229,
"epoch": 4.7047709022201225,
"grad_norm": 14.659203003472166,
"learning_rate": 9.801788290621505e-09,
"logits": -1.7177317142486572,
"logps": -88.56446838378906,
"loss": 0.0537,
"objective": 0.0582742765545845,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.675000011920929,
"regularize": 0.05827426165342331,
"step": 830
},
{
"dpo_loss": 0.5137851238250732,
"epoch": 4.733112895606991,
"grad_norm": 14.143035439452712,
"learning_rate": 7.944397831941951e-09,
"logits": -1.6337097883224487,
"logps": -88.74520874023438,
"loss": 0.0469,
"objective": 0.046563997864723206,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6416666507720947,
"regularize": 0.04656398296356201,
"step": 835
},
{
"dpo_loss": 0.5074983835220337,
"epoch": 4.7614548889938595,
"grad_norm": 14.776976768313608,
"learning_rate": 6.280555661802856e-09,
"logits": -1.637603759765625,
"logps": -89.17638397216797,
"loss": 0.049,
"objective": 0.04939265549182892,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.675000011920929,
"regularize": 0.049392636865377426,
"step": 840
},
{
"dpo_loss": 0.5405561327934265,
"epoch": 4.7897968823807275,
"grad_norm": 14.924755507827577,
"learning_rate": 4.810916246494157e-09,
"logits": -1.6720634698867798,
"logps": -88.51549530029297,
"loss": 0.0508,
"objective": 0.048842187970876694,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.6291666626930237,
"regularize": 0.048842158168554306,
"step": 845
},
{
"dpo_loss": 0.5286487340927124,
"epoch": 4.818138875767596,
"grad_norm": 14.741530280772613,
"learning_rate": 3.5360576633558513e-09,
"logits": -1.604957938194275,
"logps": -87.53111267089844,
"loss": 0.0471,
"objective": 0.049042269587516785,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6708333492279053,
"regularize": 0.04904225841164589,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.6843511462211609,
"eval_logits": -1.6919739246368408,
"eval_logps": -95.9782485961914,
"eval_loss": 0.40833279490470886,
"eval_objective": 0.4120672345161438,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 0.4120672345161438,
"eval_runtime": 259.9887,
"eval_samples_per_second": 22.27,
"eval_steps_per_second": 0.931,
"step": 850
},
{
"dpo_loss": 0.5401243567466736,
"epoch": 4.846480869154464,
"grad_norm": 14.09171530539623,
"learning_rate": 2.4564813733932155e-09,
"logits": -1.6012241840362549,
"logps": -88.17009735107422,
"loss": 0.047,
"objective": 0.04625382646918297,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6916666626930237,
"regularize": 0.04625379666686058,
"step": 855
},
{
"dpo_loss": 0.5225604772567749,
"epoch": 4.874822862541333,
"grad_norm": 14.951871396423634,
"learning_rate": 1.5726120240288631e-09,
"logits": -1.5658340454101562,
"logps": -87.6793212890625,
"loss": 0.0434,
"objective": 0.04740705341100693,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.7041666507720947,
"regularize": 0.047407038509845734,
"step": 860
},
{
"dpo_loss": 0.5320844054222107,
"epoch": 4.903164855928201,
"grad_norm": 14.202693717147874,
"learning_rate": 8.847972820693051e-10,
"logits": -1.6407142877578735,
"logps": -86.8409652709961,
"loss": 0.0464,
"objective": 0.06202062591910362,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.637499988079071,
"regularize": 0.06202061474323273,
"step": 865
},
{
"dpo_loss": 0.5251496434211731,
"epoch": 4.931506849315069,
"grad_norm": 14.584157991733704,
"learning_rate": 3.933076969516724e-10,
"logits": -1.6990463733673096,
"logps": -88.78810119628906,
"loss": 0.0513,
"objective": 0.0480065755546093,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.675000011920929,
"regularize": 0.048006556928157806,
"step": 870
},
{
"dpo_loss": 0.5269615650177002,
"epoch": 4.959848842701937,
"grad_norm": 14.31816614460119,
"learning_rate": 9.833659432367803e-11,
"logits": -1.6176135540008545,
"logps": -88.72756958007812,
"loss": 0.046,
"objective": 0.035082168877124786,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6791666746139526,
"regularize": 0.03508211672306061,
"step": 875
},
{
"dpo_loss": 0.5263823866844177,
"epoch": 4.988190836088805,
"grad_norm": 14.834100494656957,
"learning_rate": 0.0,
"logits": -1.6788469552993774,
"logps": -89.02001953125,
"loss": 0.0442,
"objective": 0.03841705247759819,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6708333492279053,
"regularize": 0.03841704502701759,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.1521414952352643,
"train_runtime": 35252.4203,
"train_samples_per_second": 7.205,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}