hZzy's picture
Model save
0d3214a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 26.827196619905514,
"learning_rate": 5.681818181818182e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6930485367774963,
"epoch": 0.02834199338686821,
"grad_norm": 26.779096499602865,
"learning_rate": 2.840909090909091e-07,
"logits": -1.3680453300476074,
"logps": -84.26322937011719,
"loss": 0.4128,
"objective": 0.37547406554222107,
"ranking_idealized": 0.6145833134651184,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 0.37547406554222107,
"step": 5
},
{
"dpo_loss": 0.6873258948326111,
"epoch": 0.05668398677373642,
"grad_norm": 27.711550621025058,
"learning_rate": 5.681818181818182e-07,
"logits": -1.4466668367385864,
"logps": -82.5794448852539,
"loss": 0.4133,
"objective": 0.4332159161567688,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4332159161567688,
"step": 10
},
{
"dpo_loss": 0.6854580044746399,
"epoch": 0.08502598016060463,
"grad_norm": 26.55882910438415,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4248812198638916,
"logps": -82.44363403320312,
"loss": 0.4132,
"objective": 0.4001390337944031,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4001390337944031,
"step": 15
},
{
"dpo_loss": 0.6893234252929688,
"epoch": 0.11336797354747284,
"grad_norm": 28.16437068526933,
"learning_rate": 1.1363636363636364e-06,
"logits": -1.4341281652450562,
"logps": -83.77052307128906,
"loss": 0.3955,
"objective": 0.4052102267742157,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.4052102267742157,
"step": 20
},
{
"dpo_loss": 0.6645582914352417,
"epoch": 0.14170996693434104,
"grad_norm": 24.3242807722224,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.491589069366455,
"logps": -82.93748474121094,
"loss": 0.3885,
"objective": 0.40111032128334045,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5041666626930237,
"regularize": 0.40111032128334045,
"step": 25
},
{
"dpo_loss": 0.6675031185150146,
"epoch": 0.17005196032120926,
"grad_norm": 26.35319215355226,
"learning_rate": 1.7045454545454546e-06,
"logits": -1.4577081203460693,
"logps": -84.03182220458984,
"loss": 0.3752,
"objective": 0.382988303899765,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 0.382988303899765,
"step": 30
},
{
"dpo_loss": 0.6479320526123047,
"epoch": 0.19839395370807747,
"grad_norm": 27.102387091432373,
"learning_rate": 1.9886363636363638e-06,
"logits": -1.4457683563232422,
"logps": -82.55751037597656,
"loss": 0.3652,
"objective": 0.3455962538719177,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5375000238418579,
"regularize": 0.34559622406959534,
"step": 35
},
{
"dpo_loss": 0.6392762660980225,
"epoch": 0.22673594709494568,
"grad_norm": 27.66182181149778,
"learning_rate": 2.2727272727272728e-06,
"logits": -1.4526138305664062,
"logps": -82.46888732910156,
"loss": 0.3679,
"objective": 0.38397228717803955,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5375000238418579,
"regularize": 0.38397228717803955,
"step": 40
},
{
"dpo_loss": 0.6107492446899414,
"epoch": 0.25507794048181387,
"grad_norm": 24.020609030227188,
"learning_rate": 2.556818181818182e-06,
"logits": -1.5597164630889893,
"logps": -85.65888214111328,
"loss": 0.3553,
"objective": 0.40014997124671936,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5625,
"regularize": 0.40014997124671936,
"step": 45
},
{
"dpo_loss": 0.6240749359130859,
"epoch": 0.2834199338686821,
"grad_norm": 22.420393674285894,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.4485610723495483,
"logps": -86.837890625,
"loss": 0.3559,
"objective": 0.3479757606983185,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.4749999940395355,
"regularize": 0.3479757606983185,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6920796632766724,
"eval_logits": -1.4739316701889038,
"eval_logps": -96.0254135131836,
"eval_loss": 0.42298370599746704,
"eval_objective": 0.4318382143974304,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5185950398445129,
"eval_regularize": 0.4318382143974304,
"eval_runtime": 305.3308,
"eval_samples_per_second": 18.963,
"eval_steps_per_second": 0.793,
"step": 50
},
{
"dpo_loss": 0.632377564907074,
"epoch": 0.3117619272555503,
"grad_norm": 29.297505573115142,
"learning_rate": 3.125e-06,
"logits": -1.499748945236206,
"logps": -92.24181365966797,
"loss": 0.372,
"objective": 0.36379119753837585,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5625,
"regularize": 0.36379119753837585,
"step": 55
},
{
"dpo_loss": 0.6205580830574036,
"epoch": 0.3401039206424185,
"grad_norm": 32.111329845751825,
"learning_rate": 3.409090909090909e-06,
"logits": -1.4636166095733643,
"logps": -88.49433135986328,
"loss": 0.3756,
"objective": 0.3948574662208557,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5541666746139526,
"regularize": 0.3948574662208557,
"step": 60
},
{
"dpo_loss": 0.6158726811408997,
"epoch": 0.3684459140292867,
"grad_norm": 21.125780754509353,
"learning_rate": 3.6931818181818186e-06,
"logits": -1.4296706914901733,
"logps": -84.69975280761719,
"loss": 0.3512,
"objective": 0.34672078490257263,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5583333373069763,
"regularize": 0.34672078490257263,
"step": 65
},
{
"dpo_loss": 0.6078273057937622,
"epoch": 0.39678790741615494,
"grad_norm": 21.36738942621028,
"learning_rate": 3.9772727272727275e-06,
"logits": -1.4390350580215454,
"logps": -84.15774536132812,
"loss": 0.3661,
"objective": 0.3570030629634857,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5625,
"regularize": 0.3570030629634857,
"step": 70
},
{
"dpo_loss": 0.604975700378418,
"epoch": 0.42512990080302315,
"grad_norm": 20.27859101420223,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.3615275621414185,
"logps": -83.67790985107422,
"loss": 0.3562,
"objective": 0.38403797149658203,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5791666507720947,
"regularize": 0.38403797149658203,
"step": 75
},
{
"dpo_loss": 0.6048832535743713,
"epoch": 0.45347189418989137,
"grad_norm": 21.18230829994196,
"learning_rate": 4.5454545454545455e-06,
"logits": -1.368180513381958,
"logps": -81.03931427001953,
"loss": 0.3692,
"objective": 0.3890025019645691,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5583333373069763,
"regularize": 0.3890025019645691,
"step": 80
},
{
"dpo_loss": 0.575659990310669,
"epoch": 0.4818138875767596,
"grad_norm": 18.639373454342547,
"learning_rate": 4.829545454545455e-06,
"logits": -1.2571583986282349,
"logps": -80.53406524658203,
"loss": 0.3542,
"objective": 0.3442578613758087,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3442578613758087,
"step": 85
},
{
"dpo_loss": 0.5988019704818726,
"epoch": 0.5101558809636277,
"grad_norm": 19.215896145423606,
"learning_rate": 4.999921328558333e-06,
"logits": -1.0600625276565552,
"logps": -79.93087005615234,
"loss": 0.3716,
"objective": 0.3766815960407257,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5041666626930237,
"regularize": 0.3766815960407257,
"step": 90
},
{
"dpo_loss": 0.5648664832115173,
"epoch": 0.538497874350496,
"grad_norm": 18.301401671657857,
"learning_rate": 4.999036331701828e-06,
"logits": -1.1114833354949951,
"logps": -78.7803726196289,
"loss": 0.3787,
"objective": 0.3693276345729828,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5958333611488342,
"regularize": 0.3693276345729828,
"step": 95
},
{
"dpo_loss": 0.5575936436653137,
"epoch": 0.5668398677373642,
"grad_norm": 17.902415337800125,
"learning_rate": 4.997168347957521e-06,
"logits": -1.1674468517303467,
"logps": -80.26119995117188,
"loss": 0.3834,
"objective": 0.3862568736076355,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5916666388511658,
"regularize": 0.3862568736076355,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.695151686668396,
"eval_logits": -1.1784183979034424,
"eval_logps": -85.01744079589844,
"eval_loss": 0.48081931471824646,
"eval_objective": 0.4771096706390381,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5154958963394165,
"eval_regularize": 0.4771096706390381,
"eval_runtime": 306.5313,
"eval_samples_per_second": 18.889,
"eval_steps_per_second": 0.789,
"step": 100
},
{
"dpo_loss": 0.588794469833374,
"epoch": 0.5951818611242324,
"grad_norm": 18.044596123919455,
"learning_rate": 4.994318112090048e-06,
"logits": -1.0578794479370117,
"logps": -79.78172302246094,
"loss": 0.395,
"objective": 0.40164127945899963,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6000000238418579,
"regularize": 0.40164127945899963,
"step": 105
},
{
"dpo_loss": 0.5815469026565552,
"epoch": 0.6235238545111006,
"grad_norm": 21.50058623262647,
"learning_rate": 4.990486745229364e-06,
"logits": -1.2767162322998047,
"logps": -76.8266372680664,
"loss": 0.3976,
"objective": 0.4256088137626648,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.512499988079071,
"regularize": 0.4256088137626648,
"step": 110
},
{
"dpo_loss": 0.5960076451301575,
"epoch": 0.6518658478979689,
"grad_norm": 16.79094440634272,
"learning_rate": 4.985675754429744e-06,
"logits": -1.329805850982666,
"logps": -75.79194641113281,
"loss": 0.4067,
"objective": 0.4033397138118744,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5333333611488342,
"regularize": 0.4033397138118744,
"step": 115
},
{
"dpo_loss": 0.6170827746391296,
"epoch": 0.680207841284837,
"grad_norm": 18.793022818980457,
"learning_rate": 4.9798870320769884e-06,
"logits": -1.1681851148605347,
"logps": -76.48377990722656,
"loss": 0.4066,
"objective": 0.36593717336654663,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5625,
"regularize": 0.36593717336654663,
"step": 120
},
{
"dpo_loss": 0.5946592092514038,
"epoch": 0.7085498346717053,
"grad_norm": 17.05224950606184,
"learning_rate": 4.973122855144066e-06,
"logits": -1.0615798234939575,
"logps": -75.89274597167969,
"loss": 0.3915,
"objective": 0.3765077292919159,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3765077292919159,
"step": 125
},
{
"dpo_loss": 0.6107650399208069,
"epoch": 0.7368918280585735,
"grad_norm": 15.723039306234742,
"learning_rate": 4.965385884295467e-06,
"logits": -1.1680763959884644,
"logps": -74.33956909179688,
"loss": 0.393,
"objective": 0.39573410153388977,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.5291666388511658,
"regularize": 0.39573410153388977,
"step": 130
},
{
"dpo_loss": 0.5728858709335327,
"epoch": 0.7652338214454416,
"grad_norm": 15.626255395375802,
"learning_rate": 4.956679162840646e-06,
"logits": -1.1485530138015747,
"logps": -75.2200698852539,
"loss": 0.3823,
"objective": 0.36154037714004517,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5458333492279053,
"regularize": 0.36154037714004517,
"step": 135
},
{
"dpo_loss": 0.5767256617546082,
"epoch": 0.7935758148323099,
"grad_norm": 15.232853103825379,
"learning_rate": 4.947006115536947e-06,
"logits": -0.9859077334403992,
"logps": -75.89881896972656,
"loss": 0.3873,
"objective": 0.3745146691799164,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5625,
"regularize": 0.3745146691799164,
"step": 140
},
{
"dpo_loss": 0.5821123123168945,
"epoch": 0.821917808219178,
"grad_norm": 15.85191878128928,
"learning_rate": 4.9363705472424825e-06,
"logits": -0.9272752404212952,
"logps": -76.59304809570312,
"loss": 0.3707,
"objective": 0.3570760488510132,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5375000238418579,
"regularize": 0.3570760488510132,
"step": 145
},
{
"dpo_loss": 0.5737297534942627,
"epoch": 0.8502598016060463,
"grad_norm": 16.000525131960934,
"learning_rate": 4.924776641419513e-06,
"logits": -0.9384148120880127,
"logps": -74.83208465576172,
"loss": 0.3746,
"objective": 0.36504948139190674,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6000000238418579,
"regularize": 0.36504948139190674,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.7075234055519104,
"eval_logits": -1.0147572755813599,
"eval_logps": -81.63130950927734,
"eval_loss": 0.5244768261909485,
"eval_objective": 0.5243399143218994,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5165289044380188,
"eval_regularize": 0.5243399143218994,
"eval_runtime": 304.8522,
"eval_samples_per_second": 18.993,
"eval_steps_per_second": 0.794,
"step": 150
},
{
"dpo_loss": 0.5900284051895142,
"epoch": 0.8786017949929145,
"grad_norm": 16.142319382249244,
"learning_rate": 4.9122289584888926e-06,
"logits": -0.9796825051307678,
"logps": -73.79574584960938,
"loss": 0.3743,
"objective": 0.36068999767303467,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 0.36068999767303467,
"step": 155
},
{
"dpo_loss": 0.581436812877655,
"epoch": 0.9069437883797827,
"grad_norm": 14.536280492665549,
"learning_rate": 4.8987324340362445e-06,
"logits": -0.9089191555976868,
"logps": -74.14305877685547,
"loss": 0.3561,
"objective": 0.3342379629611969,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5791666507720947,
"regularize": 0.3342379629611969,
"step": 160
},
{
"dpo_loss": 0.5954117178916931,
"epoch": 0.9352857817666509,
"grad_norm": 14.701365211451375,
"learning_rate": 4.884292376870567e-06,
"logits": -0.9048901796340942,
"logps": -74.482666015625,
"loss": 0.3603,
"objective": 0.3528175950050354,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.574999988079071,
"regularize": 0.3528175950050354,
"step": 165
},
{
"dpo_loss": 0.5802183747291565,
"epoch": 0.9636277751535192,
"grad_norm": 14.493481098965965,
"learning_rate": 4.868914466936038e-06,
"logits": -0.9519513249397278,
"logps": -73.6054916381836,
"loss": 0.36,
"objective": 0.3644227087497711,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5583333373069763,
"regularize": 0.3644227087497711,
"step": 170
},
{
"dpo_loss": 0.5715630054473877,
"epoch": 0.9919697685403873,
"grad_norm": 14.890486278379207,
"learning_rate": 4.8526047530778175e-06,
"logits": -0.9974517822265625,
"logps": -72.90505981445312,
"loss": 0.3487,
"objective": 0.33891138434410095,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6333333253860474,
"regularize": 0.33891138434410095,
"step": 175
},
{
"dpo_loss": 0.5492505431175232,
"epoch": 1.0203117619272555,
"grad_norm": 15.405845532960777,
"learning_rate": 4.835369650662767e-06,
"logits": -1.026891827583313,
"logps": -73.60990142822266,
"loss": 0.3572,
"objective": 0.37355440855026245,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6166666746139526,
"regularize": 0.37355440855026245,
"step": 180
},
{
"dpo_loss": 0.5769887566566467,
"epoch": 1.0486537553141237,
"grad_norm": 14.375308639465139,
"learning_rate": 4.817215939055984e-06,
"logits": -0.8692090511322021,
"logps": -74.66992950439453,
"loss": 0.3467,
"objective": 0.3649583160877228,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5625,
"regularize": 0.3649583160877228,
"step": 185
},
{
"dpo_loss": 0.5738885998725891,
"epoch": 1.076995748700992,
"grad_norm": 14.51613583082745,
"learning_rate": 4.798150758954164e-06,
"logits": -0.9525903463363647,
"logps": -77.0860824584961,
"loss": 0.326,
"objective": 0.28388112783432007,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.28388112783432007,
"step": 190
},
{
"dpo_loss": 0.5714147090911865,
"epoch": 1.10533774208786,
"grad_norm": 14.487338275298626,
"learning_rate": 4.778181609576832e-06,
"logits": -1.04482901096344,
"logps": -76.55809020996094,
"loss": 0.3359,
"objective": 0.3480500280857086,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.625,
"regularize": 0.3480500280857086,
"step": 195
},
{
"dpo_loss": 0.5680884122848511,
"epoch": 1.1336797354747283,
"grad_norm": 13.467634775423322,
"learning_rate": 4.757316345716554e-06,
"logits": -1.0929234027862549,
"logps": -74.94612121582031,
"loss": 0.3365,
"objective": 0.3373413681983948,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5916666388511658,
"regularize": 0.3373413681983948,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.7045410871505737,
"eval_logits": -1.081216812133789,
"eval_logps": -80.30853271484375,
"eval_loss": 0.5509735941886902,
"eval_objective": 0.5435125827789307,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 0.5435125827789307,
"eval_runtime": 306.4337,
"eval_samples_per_second": 18.895,
"eval_steps_per_second": 0.79,
"step": 200
},
{
"dpo_loss": 0.5709416270256042,
"epoch": 1.1620217288615966,
"grad_norm": 14.362316497314021,
"learning_rate": 4.735563174649278e-06,
"logits": -0.9582468867301941,
"logps": -76.56965637207031,
"loss": 0.3235,
"objective": 0.3568976819515228,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5541666746139526,
"regularize": 0.3568976819515228,
"step": 205
},
{
"dpo_loss": 0.5406769514083862,
"epoch": 1.1903637222484649,
"grad_norm": 14.379661109842203,
"learning_rate": 4.7129306529060415e-06,
"logits": -0.8170334696769714,
"logps": -77.32323455810547,
"loss": 0.3299,
"objective": 0.32689639925956726,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5708333253860474,
"regularize": 0.32689639925956726,
"step": 210
},
{
"dpo_loss": 0.579511284828186,
"epoch": 1.2187057156353331,
"grad_norm": 13.661507608183136,
"learning_rate": 4.68942768290728e-06,
"logits": -0.7792264223098755,
"logps": -76.88572692871094,
"loss": 0.3051,
"objective": 0.3139689862728119,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5708333253860474,
"regularize": 0.3139689862728119,
"step": 215
},
{
"dpo_loss": 0.5531005263328552,
"epoch": 1.2470477090222012,
"grad_norm": 13.52084314453953,
"learning_rate": 4.665063509461098e-06,
"logits": -0.6951576471328735,
"logps": -75.48503875732422,
"loss": 0.3088,
"objective": 0.29014942049980164,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.6583333611488342,
"regularize": 0.29014942049980164,
"step": 220
},
{
"dpo_loss": 0.5622245073318481,
"epoch": 1.2753897024090695,
"grad_norm": 13.599594203183662,
"learning_rate": 4.639847716126855e-06,
"logits": -0.8319641351699829,
"logps": -76.13066101074219,
"loss": 0.3046,
"objective": 0.2946093678474426,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6166666746139526,
"regularize": 0.2946093678474426,
"step": 225
},
{
"dpo_loss": 0.5498285889625549,
"epoch": 1.3037316957959377,
"grad_norm": 13.631491302105779,
"learning_rate": 4.613790221445511e-06,
"logits": -0.8351464867591858,
"logps": -76.21858978271484,
"loss": 0.2907,
"objective": 0.2966916263103485,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2966916263103485,
"step": 230
},
{
"dpo_loss": 0.5353298187255859,
"epoch": 1.3320736891828058,
"grad_norm": 13.968269928794491,
"learning_rate": 4.586901275038201e-06,
"logits": -0.9569566249847412,
"logps": -73.50439453125,
"loss": 0.2932,
"objective": 0.282253235578537,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5916666388511658,
"regularize": 0.282253235578537,
"step": 235
},
{
"dpo_loss": 0.5563216805458069,
"epoch": 1.360415682569674,
"grad_norm": 13.29819869933344,
"learning_rate": 4.559191453574582e-06,
"logits": -0.8645553588867188,
"logps": -75.53768920898438,
"loss": 0.2903,
"objective": 0.3077048361301422,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3077048361301422,
"step": 240
},
{
"dpo_loss": 0.5537334084510803,
"epoch": 1.3887576759565423,
"grad_norm": 12.875135429729372,
"learning_rate": 4.530671656612544e-06,
"logits": -0.850976824760437,
"logps": -74.48047637939453,
"loss": 0.2813,
"objective": 0.28993913531303406,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5708333253860474,
"regularize": 0.28993913531303406,
"step": 245
},
{
"dpo_loss": 0.5563530325889587,
"epoch": 1.4170996693434104,
"grad_norm": 13.133098253949312,
"learning_rate": 4.501353102310901e-06,
"logits": -0.876754879951477,
"logps": -74.63734436035156,
"loss": 0.2986,
"objective": 0.31398385763168335,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5625,
"regularize": 0.31398385763168335,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7114357352256775,
"eval_logits": -0.9739663004875183,
"eval_logps": -79.86080932617188,
"eval_loss": 0.5600165128707886,
"eval_objective": 0.5589736700057983,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.5589736700057983,
"eval_runtime": 311.8199,
"eval_samples_per_second": 18.568,
"eval_steps_per_second": 0.776,
"step": 250
},
{
"dpo_loss": 0.5523940324783325,
"epoch": 1.4454416627302786,
"grad_norm": 13.234580872068834,
"learning_rate": 4.4712473230167775e-06,
"logits": -0.8489291071891785,
"logps": -75.83350372314453,
"loss": 0.2822,
"objective": 0.26451078057289124,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.625,
"regularize": 0.26451075077056885,
"step": 255
},
{
"dpo_loss": 0.5551812052726746,
"epoch": 1.473783656117147,
"grad_norm": 13.300182649317502,
"learning_rate": 4.440366160729393e-06,
"logits": -0.7993748784065247,
"logps": -76.90797424316406,
"loss": 0.2899,
"objective": 0.3137137293815613,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5541666746139526,
"regularize": 0.3137137293815613,
"step": 260
},
{
"dpo_loss": 0.5330025553703308,
"epoch": 1.5021256495040152,
"grad_norm": 14.207525692908433,
"learning_rate": 4.4087217624420595e-06,
"logits": -0.7082098722457886,
"logps": -76.11457824707031,
"loss": 0.2846,
"objective": 0.25817468762397766,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5916666388511658,
"regularize": 0.25817468762397766,
"step": 265
},
{
"dpo_loss": 0.5525502562522888,
"epoch": 1.5304676428908834,
"grad_norm": 12.70772023655561,
"learning_rate": 4.376326575364206e-06,
"logits": -0.7001979947090149,
"logps": -76.76708221435547,
"loss": 0.2772,
"objective": 0.2816409468650818,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5666666626930237,
"regularize": 0.2816409468650818,
"step": 270
},
{
"dpo_loss": 0.5756404399871826,
"epoch": 1.5588096362777515,
"grad_norm": 12.63088418395231,
"learning_rate": 4.34319334202531e-06,
"logits": -0.7429192066192627,
"logps": -76.83454132080078,
"loss": 0.2784,
"objective": 0.2739499509334564,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.2739499509334564,
"step": 275
},
{
"dpo_loss": 0.5417617559432983,
"epoch": 1.5871516296646198,
"grad_norm": 12.825009100783165,
"learning_rate": 4.309335095262675e-06,
"logits": -0.7624490261077881,
"logps": -73.92798614501953,
"loss": 0.2686,
"objective": 0.2632053792476654,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2632053792476654,
"step": 280
},
{
"dpo_loss": 0.5646030306816101,
"epoch": 1.615493623051488,
"grad_norm": 13.21737932177293,
"learning_rate": 4.274765153095008e-06,
"logits": -0.8555312752723694,
"logps": -74.2479019165039,
"loss": 0.2734,
"objective": 0.28485408425331116,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5958333611488342,
"regularize": 0.28485408425331116,
"step": 285
},
{
"dpo_loss": 0.5464602112770081,
"epoch": 1.643835616438356,
"grad_norm": 12.446912499924908,
"learning_rate": 4.239497113483819e-06,
"logits": -0.8550429940223694,
"logps": -72.50788879394531,
"loss": 0.2488,
"objective": 0.24861304461956024,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6416666507720947,
"regularize": 0.24861304461956024,
"step": 290
},
{
"dpo_loss": 0.5289739370346069,
"epoch": 1.6721776098252243,
"grad_norm": 12.298985319390749,
"learning_rate": 4.203544848984729e-06,
"logits": -0.7495914101600647,
"logps": -71.680908203125,
"loss": 0.2614,
"objective": 0.24754279851913452,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.24754279851913452,
"step": 295
},
{
"dpo_loss": 0.5397422909736633,
"epoch": 1.7005196032120926,
"grad_norm": 12.732726818745434,
"learning_rate": 4.16692250129073e-06,
"logits": -0.7598840594291687,
"logps": -72.7435531616211,
"loss": 0.2571,
"objective": 0.26093509793281555,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.637499988079071,
"regularize": 0.26093509793281555,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.714857280254364,
"eval_logits": -0.802342414855957,
"eval_logps": -77.65937042236328,
"eval_loss": 0.5773638486862183,
"eval_objective": 0.5723886489868164,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.5723886489868164,
"eval_runtime": 310.9204,
"eval_samples_per_second": 18.622,
"eval_steps_per_second": 0.778,
"step": 300
},
{
"dpo_loss": 0.544061541557312,
"epoch": 1.7288615965989607,
"grad_norm": 13.110616566501927,
"learning_rate": 4.129644475669617e-06,
"logits": -0.7123271822929382,
"logps": -74.22494506835938,
"loss": 0.2603,
"objective": 0.26340511441230774,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5625,
"regularize": 0.26340508460998535,
"step": 305
},
{
"dpo_loss": 0.5413281917572021,
"epoch": 1.7572035899858292,
"grad_norm": 12.898498535434477,
"learning_rate": 4.091725435297721e-06,
"logits": -0.7656295895576477,
"logps": -73.33552551269531,
"loss": 0.2607,
"objective": 0.24210144579410553,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5958333611488342,
"regularize": 0.24210144579410553,
"step": 310
},
{
"dpo_loss": 0.5535203218460083,
"epoch": 1.7855455833726972,
"grad_norm": 13.419557597255025,
"learning_rate": 4.053180295492203e-06,
"logits": -0.7532054781913757,
"logps": -74.6802749633789,
"loss": 0.2485,
"objective": 0.2460954487323761,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5791666507720947,
"regularize": 0.2460954487323761,
"step": 315
},
{
"dpo_loss": 0.5523297190666199,
"epoch": 1.8138875767595655,
"grad_norm": 12.79380807435913,
"learning_rate": 4.014024217844167e-06,
"logits": -0.8301442861557007,
"logps": -76.51591491699219,
"loss": 0.2635,
"objective": 0.3148113191127777,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3148112893104553,
"step": 320
},
{
"dpo_loss": 0.532356858253479,
"epoch": 1.8422295701464337,
"grad_norm": 13.365547750277852,
"learning_rate": 3.974272604254906e-06,
"logits": -0.8250628709793091,
"logps": -77.59033203125,
"loss": 0.2519,
"objective": 0.25422483682632446,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6208333373069763,
"regularize": 0.25422483682632446,
"step": 325
},
{
"dpo_loss": 0.5476362705230713,
"epoch": 1.8705715635333018,
"grad_norm": 12.802067588796056,
"learning_rate": 3.933941090877615e-06,
"logits": -0.6167819499969482,
"logps": -74.30210876464844,
"loss": 0.249,
"objective": 0.23240336775779724,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5541666746139526,
"regularize": 0.23240336775779724,
"step": 330
},
{
"dpo_loss": 0.5612020492553711,
"epoch": 1.89891355692017,
"grad_norm": 12.79328820625046,
"learning_rate": 3.893045541966975e-06,
"logits": -0.6927453875541687,
"logps": -73.87529754638672,
"loss": 0.2434,
"objective": 0.23465226590633392,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5874999761581421,
"regularize": 0.23465226590633392,
"step": 335
},
{
"dpo_loss": 0.5674837231636047,
"epoch": 1.9272555503070383,
"grad_norm": 13.023100915665985,
"learning_rate": 3.8516020436389945e-06,
"logits": -0.6162213683128357,
"logps": -76.17884063720703,
"loss": 0.2412,
"objective": 0.2584584355354309,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6291666626930237,
"regularize": 0.2584584355354309,
"step": 340
},
{
"dpo_loss": 0.5505400896072388,
"epoch": 1.9555975436939064,
"grad_norm": 12.686766547892125,
"learning_rate": 3.8096268975436045e-06,
"logits": -0.634881854057312,
"logps": -74.6928939819336,
"loss": 0.227,
"objective": 0.23832601308822632,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5666666626930237,
"regularize": 0.23832601308822632,
"step": 345
},
{
"dpo_loss": 0.562423825263977,
"epoch": 1.9839395370807746,
"grad_norm": 13.255584593626184,
"learning_rate": 3.767136614452458e-06,
"logits": -0.5817281603813171,
"logps": -75.97590637207031,
"loss": 0.2355,
"objective": 0.23991867899894714,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5791666507720947,
"regularize": 0.23991867899894714,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.7175589203834534,
"eval_logits": -0.7278391122817993,
"eval_logps": -79.45545196533203,
"eval_loss": 0.5796564817428589,
"eval_objective": 0.5735844373703003,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5185950398445129,
"eval_regularize": 0.5735844373703003,
"eval_runtime": 307.9093,
"eval_samples_per_second": 18.804,
"eval_steps_per_second": 0.786,
"step": 350
},
{
"dpo_loss": 0.5313494801521301,
"epoch": 2.012281530467643,
"grad_norm": 12.907638515771302,
"learning_rate": 3.724147907764478e-06,
"logits": -0.5674468874931335,
"logps": -75.11559295654297,
"loss": 0.2316,
"objective": 0.23825272917747498,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5916666388511658,
"regularize": 0.23825272917747498,
"step": 355
},
{
"dpo_loss": 0.5382583737373352,
"epoch": 2.040623523854511,
"grad_norm": 13.693755833374205,
"learning_rate": 3.6806776869317074e-06,
"logits": -0.6632164120674133,
"logps": -74.61618041992188,
"loss": 0.2199,
"objective": 0.2271762192249298,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6208333373069763,
"regularize": 0.2271762192249298,
"step": 360
},
{
"dpo_loss": 0.547314465045929,
"epoch": 2.0689655172413794,
"grad_norm": 12.48173826929469,
"learning_rate": 3.6367430508080283e-06,
"logits": -0.7491353154182434,
"logps": -77.67138671875,
"loss": 0.2218,
"objective": 0.2287120372056961,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6000000238418579,
"regularize": 0.2287120372056961,
"step": 365
},
{
"dpo_loss": 0.5396644473075867,
"epoch": 2.0973075106282475,
"grad_norm": 12.712152014920445,
"learning_rate": 3.5923612809233987e-06,
"logits": -0.6872729063034058,
"logps": -75.26972961425781,
"loss": 0.2158,
"objective": 0.20656557381153107,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5625,
"regularize": 0.20656557381153107,
"step": 370
},
{
"dpo_loss": 0.5337859988212585,
"epoch": 2.1256495040151155,
"grad_norm": 12.21493703143298,
"learning_rate": 3.547549834686222e-06,
"logits": -0.732543408870697,
"logps": -76.9386978149414,
"loss": 0.222,
"objective": 0.23369713127613068,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6583333611488342,
"regularize": 0.23369713127613068,
"step": 375
},
{
"dpo_loss": 0.5434145331382751,
"epoch": 2.153991497401984,
"grad_norm": 12.412606280858217,
"learning_rate": 3.5023263385165346e-06,
"logits": -0.6855267286300659,
"logps": -76.3364486694336,
"loss": 0.2038,
"objective": 0.21526171267032623,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5874999761581421,
"regularize": 0.21526171267032623,
"step": 380
},
{
"dpo_loss": 0.5589515566825867,
"epoch": 2.182333490788852,
"grad_norm": 13.046894193039002,
"learning_rate": 3.4567085809127247e-06,
"logits": -0.6990526914596558,
"logps": -78.23905944824219,
"loss": 0.2127,
"objective": 0.22517748177051544,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.574999988079071,
"regularize": 0.22517748177051544,
"step": 385
},
{
"dpo_loss": 0.5659069418907166,
"epoch": 2.21067548417572,
"grad_norm": 13.492951762790497,
"learning_rate": 3.410714505454486e-06,
"logits": -0.6352434754371643,
"logps": -76.46235656738281,
"loss": 0.2106,
"objective": 0.2098178267478943,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5249999761581421,
"regularize": 0.2098177969455719,
"step": 390
},
{
"dpo_loss": 0.520107626914978,
"epoch": 2.2390174775625886,
"grad_norm": 12.062788566128349,
"learning_rate": 3.364362203744777e-06,
"logits": -0.6107616424560547,
"logps": -78.00432586669922,
"loss": 0.2145,
"objective": 0.21819378435611725,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5916666388511658,
"regularize": 0.21819378435611725,
"step": 395
},
{
"dpo_loss": 0.5222891569137573,
"epoch": 2.2673594709494567,
"grad_norm": 13.243434332838024,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.6634539365768433,
"logps": -78.29851531982422,
"loss": 0.1974,
"objective": 0.19920341670513153,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6499999761581421,
"regularize": 0.19920340180397034,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.715599775314331,
"eval_logits": -0.7596362233161926,
"eval_logps": -81.36702728271484,
"eval_loss": 0.5802313685417175,
"eval_objective": 0.5785107612609863,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5278925895690918,
"eval_regularize": 0.5785107612609863,
"eval_runtime": 305.2997,
"eval_samples_per_second": 18.965,
"eval_steps_per_second": 0.793,
"step": 400
},
{
"dpo_loss": 0.5360192060470581,
"epoch": 2.295701464336325,
"grad_norm": 12.435630926562606,
"learning_rate": 3.2706559853460818e-06,
"logits": -0.7467118501663208,
"logps": -76.52106475830078,
"loss": 0.1969,
"objective": 0.19979530572891235,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.637499988079071,
"regularize": 0.19979530572891235,
"step": 405
},
{
"dpo_loss": 0.5409468412399292,
"epoch": 2.324043457723193,
"grad_norm": 11.990944827281423,
"learning_rate": 3.2233389276586325e-06,
"logits": -0.6921295523643494,
"logps": -76.02128601074219,
"loss": 0.1889,
"objective": 0.18640001118183136,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5666666626930237,
"regularize": 0.18640001118183136,
"step": 410
},
{
"dpo_loss": 0.5444414615631104,
"epoch": 2.3523854511100613,
"grad_norm": 12.656500632614845,
"learning_rate": 3.1757373472244324e-06,
"logits": -0.6981016993522644,
"logps": -76.54192352294922,
"loss": 0.191,
"objective": 0.19465957581996918,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5791666507720947,
"regularize": 0.19465957581996918,
"step": 415
},
{
"dpo_loss": 0.5401104092597961,
"epoch": 2.3807274444969297,
"grad_norm": 11.869598225301196,
"learning_rate": 3.127869967952698e-06,
"logits": -0.6331411004066467,
"logps": -77.9117202758789,
"loss": 0.1932,
"objective": 0.19117851555347443,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5625,
"regularize": 0.19117851555347443,
"step": 420
},
{
"dpo_loss": 0.5331805944442749,
"epoch": 2.409069437883798,
"grad_norm": 12.370377424025024,
"learning_rate": 3.0797556183036582e-06,
"logits": -0.659760057926178,
"logps": -77.06755065917969,
"loss": 0.1996,
"objective": 0.20076175034046173,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.20076175034046173,
"step": 425
},
{
"dpo_loss": 0.5397772192955017,
"epoch": 2.4374114312706663,
"grad_norm": 11.643686599023107,
"learning_rate": 3.0314132238824416e-06,
"logits": -0.6464790105819702,
"logps": -77.9500503540039,
"loss": 0.1919,
"objective": 0.19164572656154633,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6000000238418579,
"regularize": 0.19164572656154633,
"step": 430
},
{
"dpo_loss": 0.5243253111839294,
"epoch": 2.4657534246575343,
"grad_norm": 11.712683298007569,
"learning_rate": 2.9828617999947647e-06,
"logits": -0.6973146796226501,
"logps": -76.83717346191406,
"loss": 0.1917,
"objective": 0.20314709842205048,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5958333611488342,
"regularize": 0.20314709842205048,
"step": 435
},
{
"dpo_loss": 0.5402427315711975,
"epoch": 2.4940954180444024,
"grad_norm": 12.183847925432286,
"learning_rate": 2.9341204441673267e-06,
"logits": -0.6815401315689087,
"logps": -76.1678695678711,
"loss": 0.1838,
"objective": 0.17876936495304108,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5791666507720947,
"regularize": 0.1787693351507187,
"step": 440
},
{
"dpo_loss": 0.5401654839515686,
"epoch": 2.5224374114312704,
"grad_norm": 11.596641440144525,
"learning_rate": 2.8852083286358647e-06,
"logits": -0.6953208446502686,
"logps": -74.04133605957031,
"loss": 0.1837,
"objective": 0.17748965322971344,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6041666865348816,
"regularize": 0.17748965322971344,
"step": 445
},
{
"dpo_loss": 0.5587974786758423,
"epoch": 2.550779404818139,
"grad_norm": 12.363231902921715,
"learning_rate": 2.8361446928038298e-06,
"logits": -0.7141574621200562,
"logps": -76.00529479980469,
"loss": 0.1787,
"objective": 0.17918486893177032,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5416666865348816,
"regularize": 0.17918486893177032,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.716120183467865,
"eval_logits": -0.7106173038482666,
"eval_logps": -80.80033111572266,
"eval_loss": 0.5830134749412537,
"eval_objective": 0.5798959732055664,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.5798959732055664,
"eval_runtime": 310.4005,
"eval_samples_per_second": 18.653,
"eval_steps_per_second": 0.78,
"step": 450
},
{
"dpo_loss": 0.5358783006668091,
"epoch": 2.579121398205007,
"grad_norm": 12.672287662194814,
"learning_rate": 2.7869488356746344e-06,
"logits": -0.651078462600708,
"logps": -76.96314239501953,
"loss": 0.1798,
"objective": 0.18865393102169037,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5958333611488342,
"regularize": 0.18865393102169037,
"step": 455
},
{
"dpo_loss": 0.525675356388092,
"epoch": 2.6074633915918755,
"grad_norm": 11.750098983866883,
"learning_rate": 2.7376401082604563e-06,
"logits": -0.6694878339767456,
"logps": -76.94908905029297,
"loss": 0.1726,
"objective": 0.17671962082386017,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6083333492279053,
"regularize": 0.17671962082386017,
"step": 460
},
{
"dpo_loss": 0.5402486324310303,
"epoch": 2.6358053849787435,
"grad_norm": 11.832013815001902,
"learning_rate": 2.6882379059705953e-06,
"logits": -0.5349324345588684,
"logps": -77.00886535644531,
"loss": 0.1673,
"objective": 0.15668810904026031,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5541666746139526,
"regularize": 0.15668810904026031,
"step": 465
},
{
"dpo_loss": 0.5375062823295593,
"epoch": 2.6641473783656116,
"grad_norm": 12.335063217908251,
"learning_rate": 2.6387616609823506e-06,
"logits": -0.5760343670845032,
"logps": -76.343994140625,
"loss": 0.1727,
"objective": 0.15891402959823608,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5958333611488342,
"regularize": 0.15891402959823608,
"step": 470
},
{
"dpo_loss": 0.5359907150268555,
"epoch": 2.69248937175248,
"grad_norm": 12.267574887608895,
"learning_rate": 2.5892308345974517e-06,
"logits": -0.6134538054466248,
"logps": -75.63439178466797,
"loss": 0.1674,
"objective": 0.15188990533351898,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6083333492279053,
"regularize": 0.15188990533351898,
"step": 475
},
{
"dpo_loss": 0.5327687859535217,
"epoch": 2.720831365139348,
"grad_norm": 12.442899229326,
"learning_rate": 2.53966490958702e-06,
"logits": -0.6768204569816589,
"logps": -74.73170471191406,
"loss": 0.1658,
"objective": 0.1598203480243683,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5916666388511658,
"regularize": 0.1598203480243683,
"step": 480
},
{
"dpo_loss": 0.5352267622947693,
"epoch": 2.7491733585262166,
"grad_norm": 11.94503762472298,
"learning_rate": 2.490083382528097e-06,
"logits": -0.6315467357635498,
"logps": -77.08844757080078,
"loss": 0.1607,
"objective": 0.14373879134655,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.574999988079071,
"regularize": 0.14373879134655,
"step": 485
},
{
"dpo_loss": 0.5155076384544373,
"epoch": 2.7775153519130846,
"grad_norm": 11.88965868362973,
"learning_rate": 2.440505756134732e-06,
"logits": -0.588857889175415,
"logps": -75.61197662353516,
"loss": 0.1625,
"objective": 0.1591319590806961,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.574999988079071,
"regularize": 0.1591319590806961,
"step": 490
},
{
"dpo_loss": 0.5332462191581726,
"epoch": 2.8058573452999527,
"grad_norm": 12.56886938049473,
"learning_rate": 2.3909515315866606e-06,
"logits": -0.6140356063842773,
"logps": -74.00759887695312,
"loss": 0.1536,
"objective": 0.14011286199092865,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.14011286199092865,
"step": 495
},
{
"dpo_loss": 0.5316342711448669,
"epoch": 2.8341993386868207,
"grad_norm": 13.210636360710986,
"learning_rate": 2.341440200858589e-06,
"logits": -0.6548908352851868,
"logps": -73.46178436279297,
"loss": 0.1582,
"objective": 0.16330239176750183,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.16330239176750183,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.7177305817604065,
"eval_logits": -0.7271575927734375,
"eval_logps": -80.30960845947266,
"eval_loss": 0.5835670828819275,
"eval_objective": 0.5800158977508545,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5175619721412659,
"eval_regularize": 0.5800158977508545,
"eval_runtime": 305.7617,
"eval_samples_per_second": 18.936,
"eval_steps_per_second": 0.791,
"step": 500
},
{
"dpo_loss": 0.5332918763160706,
"epoch": 2.862541332073689,
"grad_norm": 12.029588140120659,
"learning_rate": 2.2919912390530945e-06,
"logits": -0.6230663061141968,
"logps": -75.35047912597656,
"loss": 0.1537,
"objective": 0.1514635533094406,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 0.1514635533094406,
"step": 505
},
{
"dpo_loss": 0.5396894216537476,
"epoch": 2.8908833254605573,
"grad_norm": 12.137559628481368,
"learning_rate": 2.242624096740164e-06,
"logits": -0.5799409747123718,
"logps": -76.0989761352539,
"loss": 0.1479,
"objective": 0.135562464594841,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.135562464594841,
"step": 510
},
{
"dpo_loss": 0.5431153178215027,
"epoch": 2.9192253188474258,
"grad_norm": 12.661269374784712,
"learning_rate": 2.193358192306384e-06,
"logits": -0.6524708867073059,
"logps": -75.5029067993164,
"loss": 0.1493,
"objective": 0.15468096733093262,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5708333253860474,
"regularize": 0.15468096733093262,
"step": 515
},
{
"dpo_loss": 0.5469124913215637,
"epoch": 2.947567312234294,
"grad_norm": 11.884820373737863,
"learning_rate": 2.1442129043167877e-06,
"logits": -0.599845290184021,
"logps": -77.28815460205078,
"loss": 0.1425,
"objective": 0.1461026966571808,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5916666388511658,
"regularize": 0.1461026966571808,
"step": 520
},
{
"dpo_loss": 0.524174690246582,
"epoch": 2.975909305621162,
"grad_norm": 12.08259988276891,
"learning_rate": 2.0952075638923656e-06,
"logits": -0.5986518859863281,
"logps": -76.06282806396484,
"loss": 0.1444,
"objective": 0.14290857315063477,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6333333253860474,
"regularize": 0.14290857315063477,
"step": 525
},
{
"dpo_loss": 0.5151727199554443,
"epoch": 3.0042512990080303,
"grad_norm": 12.395048106432046,
"learning_rate": 2.046361447106244e-06,
"logits": -0.6094143986701965,
"logps": -75.57567596435547,
"loss": 0.1418,
"objective": 0.14209994673728943,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 0.14209994673728943,
"step": 530
},
{
"dpo_loss": 0.5413444638252258,
"epoch": 3.0325932923948984,
"grad_norm": 12.113971197675227,
"learning_rate": 1.997693767401503e-06,
"logits": -0.6318843960762024,
"logps": -77.62167358398438,
"loss": 0.1364,
"objective": 0.13606974482536316,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6166666746139526,
"regularize": 0.13606974482536316,
"step": 535
},
{
"dpo_loss": 0.5392124056816101,
"epoch": 3.0609352857817664,
"grad_norm": 12.624676506046614,
"learning_rate": 1.9492236680336486e-06,
"logits": -0.6600247621536255,
"logps": -75.87137603759766,
"loss": 0.1305,
"objective": 0.12727196514606476,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.12727196514606476,
"step": 540
},
{
"dpo_loss": 0.5333729982376099,
"epoch": 3.089277279168635,
"grad_norm": 11.94747900474122,
"learning_rate": 1.9009702145406728e-06,
"logits": -0.6088955402374268,
"logps": -76.96524810791016,
"loss": 0.1277,
"objective": 0.12902715802192688,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5583333373069763,
"regularize": 0.12902715802192688,
"step": 545
},
{
"dpo_loss": 0.5270958542823792,
"epoch": 3.117619272555503,
"grad_norm": 12.851758065750987,
"learning_rate": 1.852952387243698e-06,
"logits": -0.5018327832221985,
"logps": -77.49250030517578,
"loss": 0.1257,
"objective": 0.1227855458855629,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6083333492279053,
"regularize": 0.1227855458855629,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.7177569270133972,
"eval_logits": -0.6680934429168701,
"eval_logps": -80.876708984375,
"eval_loss": 0.5853214859962463,
"eval_objective": 0.581609308719635,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.581609308719635,
"eval_runtime": 302.9624,
"eval_samples_per_second": 19.111,
"eval_steps_per_second": 0.799,
"step": 550
},
{
"dpo_loss": 0.520027756690979,
"epoch": 3.1459612659423715,
"grad_norm": 12.026540658203501,
"learning_rate": 1.8051890737811395e-06,
"logits": -0.5085936188697815,
"logps": -77.27413940429688,
"loss": 0.1298,
"objective": 0.13011744618415833,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6291666626930237,
"regularize": 0.13011744618415833,
"step": 555
},
{
"dpo_loss": 0.5274783372879028,
"epoch": 3.1743032593292395,
"grad_norm": 12.49570560811601,
"learning_rate": 1.7576990616793139e-06,
"logits": -0.5597764253616333,
"logps": -74.51499938964844,
"loss": 0.1253,
"objective": 0.12430194020271301,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5708333253860474,
"regularize": 0.12430194020271301,
"step": 560
},
{
"dpo_loss": 0.5323359370231628,
"epoch": 3.2026452527161076,
"grad_norm": 12.00538487916689,
"learning_rate": 1.7105010309624381e-06,
"logits": -0.5895612835884094,
"logps": -76.143310546875,
"loss": 0.1215,
"objective": 0.13054049015045166,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6416666507720947,
"regularize": 0.13054047524929047,
"step": 565
},
{
"dpo_loss": 0.5244373083114624,
"epoch": 3.230987246102976,
"grad_norm": 11.706295916540721,
"learning_rate": 1.6636135468049122e-06,
"logits": -0.5602142810821533,
"logps": -75.6644515991211,
"loss": 0.1165,
"objective": 0.10757040977478027,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.574999988079071,
"regularize": 0.10757040977478027,
"step": 570
},
{
"dpo_loss": 0.5240556001663208,
"epoch": 3.259329239489844,
"grad_norm": 12.017586482980144,
"learning_rate": 1.617055052228768e-06,
"logits": -0.6023640036582947,
"logps": -76.43074798583984,
"loss": 0.1137,
"objective": 0.11454036831855774,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6166666746139526,
"regularize": 0.11454036086797714,
"step": 575
},
{
"dpo_loss": 0.5436674952507019,
"epoch": 3.287671232876712,
"grad_norm": 12.849990562743383,
"learning_rate": 1.5708438608491816e-06,
"logits": -0.6143894195556641,
"logps": -76.68331909179688,
"loss": 0.1196,
"objective": 0.11335032433271408,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6166666746139526,
"regularize": 0.11335031688213348,
"step": 580
},
{
"dpo_loss": 0.5411363244056702,
"epoch": 3.3160132262635806,
"grad_norm": 12.545535124570625,
"learning_rate": 1.524998149670871e-06,
"logits": -0.6247425675392151,
"logps": -77.41455841064453,
"loss": 0.113,
"objective": 0.11495224386453629,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.612500011920929,
"regularize": 0.11495222896337509,
"step": 585
},
{
"dpo_loss": 0.5388593077659607,
"epoch": 3.3443552196504487,
"grad_norm": 12.180484930435734,
"learning_rate": 1.479535951938243e-06,
"logits": -0.6511493921279907,
"logps": -77.2118911743164,
"loss": 0.1101,
"objective": 0.11349861323833466,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5874999761581421,
"regularize": 0.11349860578775406,
"step": 590
},
{
"dpo_loss": 0.5339847207069397,
"epoch": 3.372697213037317,
"grad_norm": 12.019912229417695,
"learning_rate": 1.43447515004208e-06,
"logits": -0.5625333189964294,
"logps": -76.55901336669922,
"loss": 0.1063,
"objective": 0.09969887137413025,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5916666388511658,
"regularize": 0.09969887137413025,
"step": 595
},
{
"dpo_loss": 0.5057954788208008,
"epoch": 3.4010392064241852,
"grad_norm": 12.324261804726659,
"learning_rate": 1.3898334684855647e-06,
"logits": -0.5732893347740173,
"logps": -76.14974975585938,
"loss": 0.1018,
"objective": 0.10744435340166092,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6000000238418579,
"regularize": 0.10744435340166092,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.7155191898345947,
"eval_logits": -0.6519966721534729,
"eval_logps": -80.26307678222656,
"eval_loss": 0.5869894027709961,
"eval_objective": 0.5793389678001404,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.5793389678001404,
"eval_runtime": 304.5422,
"eval_samples_per_second": 19.012,
"eval_steps_per_second": 0.795,
"step": 600
},
{
"dpo_loss": 0.5424041152000427,
"epoch": 3.4293811998110533,
"grad_norm": 12.25686443542734,
"learning_rate": 1.3456284669124159e-06,
"logits": -0.5684671401977539,
"logps": -78.04743194580078,
"loss": 0.1064,
"objective": 0.09537867456674576,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5958333611488342,
"regularize": 0.09537866711616516,
"step": 605
},
{
"dpo_loss": 0.5277451872825623,
"epoch": 3.4577231931979218,
"grad_norm": 11.965563307417655,
"learning_rate": 1.301877533199859e-06,
"logits": -0.5791481733322144,
"logps": -75.86957550048828,
"loss": 0.105,
"objective": 0.09935756027698517,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5916666388511658,
"regularize": 0.09935753792524338,
"step": 610
},
{
"dpo_loss": 0.5237266421318054,
"epoch": 3.48606518658479,
"grad_norm": 11.710547942970107,
"learning_rate": 1.2585978766191726e-06,
"logits": -0.5997554063796997,
"logps": -76.46390533447266,
"loss": 0.0994,
"objective": 0.0968371257185936,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5416666865348816,
"regularize": 0.0968371257185936,
"step": 615
},
{
"dpo_loss": 0.5097241997718811,
"epoch": 3.514407179971658,
"grad_norm": 11.439925552543285,
"learning_rate": 1.2158065210664848e-06,
"logits": -0.5296372175216675,
"logps": -75.6750259399414,
"loss": 0.1027,
"objective": 0.09600085765123367,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5666666626930237,
"regularize": 0.09600085765123367,
"step": 620
},
{
"dpo_loss": 0.5289521813392639,
"epoch": 3.5427491733585263,
"grad_norm": 12.536978726775153,
"learning_rate": 1.1735202983664803e-06,
"logits": -0.570234477519989,
"logps": -74.44934844970703,
"loss": 0.1036,
"objective": 0.10480068624019623,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6041666865348816,
"regularize": 0.10480068624019623,
"step": 625
},
{
"dpo_loss": 0.5382587313652039,
"epoch": 3.5710911667453944,
"grad_norm": 12.482247688036189,
"learning_rate": 1.1317558416516696e-06,
"logits": -0.6371855139732361,
"logps": -74.74573516845703,
"loss": 0.0981,
"objective": 0.08699598163366318,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5874999761581421,
"regularize": 0.08699598163366318,
"step": 630
},
{
"dpo_loss": 0.5292662978172302,
"epoch": 3.5994331601322624,
"grad_norm": 11.724186457910758,
"learning_rate": 1.0905295788197993e-06,
"logits": -0.6215745210647583,
"logps": -75.0141372680664,
"loss": 0.0957,
"objective": 0.09982422739267349,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5666666626930237,
"regularize": 0.09982422739267349,
"step": 635
},
{
"dpo_loss": 0.5090000033378601,
"epoch": 3.627775153519131,
"grad_norm": 12.2959167635157,
"learning_rate": 1.049857726072005e-06,
"logits": -0.5938432812690735,
"logps": -76.37276458740234,
"loss": 0.0957,
"objective": 0.09877195209264755,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5416666865348816,
"regularize": 0.09877195209264755,
"step": 640
},
{
"dpo_loss": 0.5198237299919128,
"epoch": 3.656117146905999,
"grad_norm": 11.877643838835896,
"learning_rate": 1.0097562815342215e-06,
"logits": -0.6002366542816162,
"logps": -74.8946762084961,
"loss": 0.0894,
"objective": 0.0859779417514801,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6083333492279053,
"regularize": 0.0859779417514801,
"step": 645
},
{
"dpo_loss": 0.5422871708869934,
"epoch": 3.6844591402928675,
"grad_norm": 12.290572025250485,
"learning_rate": 9.702410189643838e-07,
"logits": -0.6051768660545349,
"logps": -75.9346694946289,
"loss": 0.0908,
"objective": 0.0903468057513237,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5666666626930237,
"regularize": 0.0903467983007431,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.7142183780670166,
"eval_logits": -0.6950010061264038,
"eval_logps": -80.09378051757812,
"eval_loss": 0.5846312046051025,
"eval_objective": 0.575107753276825,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5309917330741882,
"eval_regularize": 0.575107753276825,
"eval_runtime": 311.7236,
"eval_samples_per_second": 18.574,
"eval_steps_per_second": 0.776,
"step": 650
},
{
"dpo_loss": 0.5320921540260315,
"epoch": 3.7128011336797355,
"grad_norm": 12.073732647268816,
"learning_rate": 9.313274815478698e-07,
"logits": -0.5923182964324951,
"logps": -75.15804290771484,
"loss": 0.0911,
"objective": 0.08135481178760529,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5958333611488342,
"regularize": 0.0813547894358635,
"step": 655
},
{
"dpo_loss": 0.5302092432975769,
"epoch": 3.7411431270666036,
"grad_norm": 12.119943535015905,
"learning_rate": 8.930309757836517e-07,
"logits": -0.6394330859184265,
"logps": -76.03792572021484,
"loss": 0.0872,
"objective": 0.08334127813577652,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6333333253860474,
"regularize": 0.08334127813577652,
"step": 660
},
{
"dpo_loss": 0.5253182649612427,
"epoch": 3.769485120453472,
"grad_norm": 12.163074542006058,
"learning_rate": 8.553665654635343e-07,
"logits": -0.6006038188934326,
"logps": -75.9883804321289,
"loss": 0.0854,
"objective": 0.08219381421804428,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6000000238418579,
"regularize": 0.08219381421804428,
"step": 665
},
{
"dpo_loss": 0.5255737900733948,
"epoch": 3.79782711384034,
"grad_norm": 12.572774020559422,
"learning_rate": 8.183490657468687e-07,
"logits": -0.6632742285728455,
"logps": -76.2689437866211,
"loss": 0.0866,
"objective": 0.08637838065624237,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.6833333373069763,
"regularize": 0.08637836575508118,
"step": 670
},
{
"dpo_loss": 0.5349328517913818,
"epoch": 3.826169107227208,
"grad_norm": 11.892804895505401,
"learning_rate": 7.819930373330669e-07,
"logits": -0.6208025813102722,
"logps": -75.36246490478516,
"loss": 0.0791,
"objective": 0.07929237186908722,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.612500011920929,
"regularize": 0.07929236441850662,
"step": 675
},
{
"dpo_loss": 0.54377681016922,
"epoch": 3.8545111006140766,
"grad_norm": 11.851078631402974,
"learning_rate": 7.463127807341966e-07,
"logits": -0.5686856508255005,
"logps": -75.87733459472656,
"loss": 0.0851,
"objective": 0.08981513231992722,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5791666507720947,
"regularize": 0.08981513231992722,
"step": 680
},
{
"dpo_loss": 0.5212653279304504,
"epoch": 3.8828530940009447,
"grad_norm": 12.638205294617679,
"learning_rate": 7.113223306499336e-07,
"logits": -0.5982258319854736,
"logps": -75.38506317138672,
"loss": 0.086,
"objective": 0.07463731616735458,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6000000238418579,
"regularize": 0.07463730871677399,
"step": 685
},
{
"dpo_loss": 0.5221771001815796,
"epoch": 3.9111950873878127,
"grad_norm": 11.687391766152553,
"learning_rate": 6.770354504470575e-07,
"logits": -0.6342039108276367,
"logps": -75.1083755493164,
"loss": 0.0781,
"objective": 0.07596276700496674,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6458333134651184,
"regularize": 0.07596276700496674,
"step": 690
},
{
"dpo_loss": 0.5347076654434204,
"epoch": 3.9395370807746812,
"grad_norm": 11.88312797363931,
"learning_rate": 6.434656267456843e-07,
"logits": -0.5775099992752075,
"logps": -76.01984405517578,
"loss": 0.0768,
"objective": 0.0815543457865715,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5375000238418579,
"regularize": 0.0815543457865715,
"step": 695
},
{
"dpo_loss": 0.5384576916694641,
"epoch": 3.9678790741615493,
"grad_norm": 11.978357414624993,
"learning_rate": 6.106260641143547e-07,
"logits": -0.5876869559288025,
"logps": -76.2168960571289,
"loss": 0.0782,
"objective": 0.08425504714250565,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5791666507720947,
"regularize": 0.08425504714250565,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.7158195376396179,
"eval_logits": -0.6694127321243286,
"eval_logps": -80.57975769042969,
"eval_loss": 0.5832462906837463,
"eval_objective": 0.5775305032730103,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.5775305032730103,
"eval_runtime": 303.6208,
"eval_samples_per_second": 19.07,
"eval_steps_per_second": 0.797,
"step": 700
},
{
"dpo_loss": 0.5220022797584534,
"epoch": 3.9962210675484178,
"grad_norm": 11.865443583379125,
"learning_rate": 5.785296798760601e-07,
"logits": -0.5430007576942444,
"logps": -74.90709686279297,
"loss": 0.0746,
"objective": 0.07280976325273514,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.612500011920929,
"regularize": 0.07280976325273514,
"step": 705
},
{
"dpo_loss": 0.5136677026748657,
"epoch": 4.024563060935286,
"grad_norm": 12.91211905572677,
"learning_rate": 5.471890990272666e-07,
"logits": -0.5883128643035889,
"logps": -76.31259155273438,
"loss": 0.0646,
"objective": 0.06166737899184227,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.06166737526655197,
"step": 710
},
{
"dpo_loss": 0.5278608202934265,
"epoch": 4.052905054322154,
"grad_norm": 12.122689089246824,
"learning_rate": 5.166166492719124e-07,
"logits": -0.5741557478904724,
"logps": -75.61717987060547,
"loss": 0.0617,
"objective": 0.060495439916849136,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5916666388511658,
"regularize": 0.06049543619155884,
"step": 715
},
{
"dpo_loss": 0.5187323093414307,
"epoch": 4.081247047709022,
"grad_norm": 11.560808093959789,
"learning_rate": 4.868243561723535e-07,
"logits": -0.5206624865531921,
"logps": -78.19841003417969,
"loss": 0.0635,
"objective": 0.06401447206735611,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.06401447206735611,
"step": 720
},
{
"dpo_loss": 0.540945827960968,
"epoch": 4.109589041095891,
"grad_norm": 11.843737418563835,
"learning_rate": 4.57823938419153e-07,
"logits": -0.5632360577583313,
"logps": -75.70304107666016,
"loss": 0.0616,
"objective": 0.06045746058225632,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5874999761581421,
"regularize": 0.06045745685696602,
"step": 725
},
{
"dpo_loss": 0.5236831903457642,
"epoch": 4.137931034482759,
"grad_norm": 12.096865413097825,
"learning_rate": 4.2962680322157335e-07,
"logits": -0.6435456871986389,
"logps": -76.09138488769531,
"loss": 0.061,
"objective": 0.059013016521930695,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.059013016521930695,
"step": 730
},
{
"dpo_loss": 0.533221423625946,
"epoch": 4.166273027869627,
"grad_norm": 12.657248993273553,
"learning_rate": 4.0224404182059443e-07,
"logits": -0.5540097951889038,
"logps": -77.43226623535156,
"loss": 0.0556,
"objective": 0.05801505967974663,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6166666746139526,
"regularize": 0.05801505222916603,
"step": 735
},
{
"dpo_loss": 0.5072778463363647,
"epoch": 4.194615021256495,
"grad_norm": 12.763432211885254,
"learning_rate": 3.756864251262143e-07,
"logits": -0.5889293551445007,
"logps": -76.94921875,
"loss": 0.0586,
"objective": 0.06059374660253525,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6083333492279053,
"regularize": 0.06059373542666435,
"step": 740
},
{
"dpo_loss": 0.5195721387863159,
"epoch": 4.222957014643363,
"grad_norm": 13.011382942960159,
"learning_rate": 3.499643994807486e-07,
"logits": -0.636326789855957,
"logps": -74.38599395751953,
"loss": 0.0567,
"objective": 0.06092626973986626,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5708333253860474,
"regularize": 0.06092626228928566,
"step": 745
},
{
"dpo_loss": 0.5143676996231079,
"epoch": 4.251299008030231,
"grad_norm": 12.170871057268412,
"learning_rate": 3.250880825498026e-07,
"logits": -0.6765701770782471,
"logps": -76.2116928100586,
"loss": 0.0573,
"objective": 0.05863998085260391,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6000000238418579,
"regularize": 0.05863996967673302,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.7155545949935913,
"eval_logits": -0.6764047145843506,
"eval_logps": -80.59185791015625,
"eval_loss": 0.5846724510192871,
"eval_objective": 0.5774604678153992,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.5774604678153992,
"eval_runtime": 306.7663,
"eval_samples_per_second": 18.874,
"eval_steps_per_second": 0.789,
"step": 750
},
{
"dpo_loss": 0.5219383239746094,
"epoch": 4.2796410014171,
"grad_norm": 12.194092389260657,
"learning_rate": 3.0106725934252095e-07,
"logits": -0.6311543583869934,
"logps": -75.78201293945312,
"loss": 0.0583,
"objective": 0.05387051776051521,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6083333492279053,
"regularize": 0.053870514035224915,
"step": 755
},
{
"dpo_loss": 0.5232734084129333,
"epoch": 4.307982994803968,
"grad_norm": 11.576901998049738,
"learning_rate": 2.779113783626916e-07,
"logits": -0.609634280204773,
"logps": -76.47386932373047,
"loss": 0.0558,
"objective": 0.04752067103981972,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6458333134651184,
"regularize": 0.04752066358923912,
"step": 760
},
{
"dpo_loss": 0.5060269832611084,
"epoch": 4.336324988190836,
"grad_norm": 11.974550505334834,
"learning_rate": 2.5562954789221164e-07,
"logits": -0.6198402047157288,
"logps": -76.60380554199219,
"loss": 0.0546,
"objective": 0.048433538526296616,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6416666507720947,
"regularize": 0.04843352735042572,
"step": 765
},
{
"dpo_loss": 0.5125846862792969,
"epoch": 4.364666981577704,
"grad_norm": 12.13893132092522,
"learning_rate": 2.3423053240837518e-07,
"logits": -0.5853985548019409,
"logps": -75.1782455444336,
"loss": 0.0577,
"objective": 0.05432061105966568,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5958333611488342,
"regularize": 0.054320596158504486,
"step": 770
},
{
"dpo_loss": 0.5186619162559509,
"epoch": 4.393008974964572,
"grad_norm": 12.025324933873947,
"learning_rate": 2.137227491364016e-07,
"logits": -0.5982651114463806,
"logps": -76.23176574707031,
"loss": 0.0524,
"objective": 0.05267190933227539,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6000000238418579,
"regularize": 0.05267190933227539,
"step": 775
},
{
"dpo_loss": 0.5261180400848389,
"epoch": 4.42135096835144,
"grad_norm": 11.779490213275317,
"learning_rate": 1.941142647385469e-07,
"logits": -0.6180116534233093,
"logps": -74.97576141357422,
"loss": 0.052,
"objective": 0.04746713861823082,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5708333253860474,
"regularize": 0.04746713116765022,
"step": 780
},
{
"dpo_loss": 0.5437088012695312,
"epoch": 4.449692961738309,
"grad_norm": 12.241647561755466,
"learning_rate": 1.7541279214111277e-07,
"logits": -0.6545938849449158,
"logps": -75.04468536376953,
"loss": 0.0549,
"objective": 0.06091846525669098,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5958333611488342,
"regularize": 0.06091845780611038,
"step": 785
},
{
"dpo_loss": 0.5158849358558655,
"epoch": 4.478034955125177,
"grad_norm": 12.172553863147998,
"learning_rate": 1.5762568750059604e-07,
"logits": -0.6263752579689026,
"logps": -77.42415618896484,
"loss": 0.0488,
"objective": 0.050752636045217514,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.637499988079071,
"regularize": 0.05075262859463692,
"step": 790
},
{
"dpo_loss": 0.523824155330658,
"epoch": 4.506376948512045,
"grad_norm": 11.642533263792576,
"learning_rate": 1.4075994731016895e-07,
"logits": -0.5398118495941162,
"logps": -77.37626647949219,
"loss": 0.0497,
"objective": 0.049972303211688995,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5916666388511658,
"regularize": 0.0499722920358181,
"step": 795
},
{
"dpo_loss": 0.5201839804649353,
"epoch": 4.534718941898913,
"grad_norm": 12.033841229595977,
"learning_rate": 1.2482220564763669e-07,
"logits": -0.5231271982192993,
"logps": -76.26326751708984,
"loss": 0.0513,
"objective": 0.047722525894641876,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6166666746139526,
"regularize": 0.04772252216935158,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.7149068713188171,
"eval_logits": -0.680606484413147,
"eval_logps": -80.50379943847656,
"eval_loss": 0.5835468769073486,
"eval_objective": 0.5758251547813416,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.5758251547813416,
"eval_runtime": 309.1293,
"eval_samples_per_second": 18.73,
"eval_steps_per_second": 0.783,
"step": 800
},
{
"dpo_loss": 0.5226713418960571,
"epoch": 4.563060935285781,
"grad_norm": 11.85308064151459,
"learning_rate": 1.0981873156594381e-07,
"logits": -0.5926477909088135,
"logps": -75.56033325195312,
"loss": 0.0501,
"objective": 0.04832206293940544,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5583333373069763,
"regularize": 0.04832205921411514,
"step": 805
},
{
"dpo_loss": 0.5298218727111816,
"epoch": 4.59140292867265,
"grad_norm": 11.585229252604742,
"learning_rate": 9.575542662726756e-08,
"logits": -0.6329571008682251,
"logps": -74.88050079345703,
"loss": 0.0486,
"objective": 0.04479978233575821,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.5249999761581421,
"regularize": 0.04479977861046791,
"step": 810
},
{
"dpo_loss": 0.5306591391563416,
"epoch": 4.619744922059518,
"grad_norm": 11.532261417058377,
"learning_rate": 8.26378225816582e-08,
"logits": -0.5220829844474792,
"logps": -76.33390045166016,
"loss": 0.049,
"objective": 0.04780552163720131,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6791666746139526,
"regularize": 0.047805510461330414,
"step": 815
},
{
"dpo_loss": 0.5376265048980713,
"epoch": 4.648086915446386,
"grad_norm": 11.844210455280477,
"learning_rate": 7.047107919114588e-08,
"logits": -0.6030393838882446,
"logps": -76.0525131225586,
"loss": 0.0493,
"objective": 0.04802839830517769,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6166666746139526,
"regularize": 0.04802839085459709,
"step": 820
},
{
"dpo_loss": 0.537010133266449,
"epoch": 4.6764289088332545,
"grad_norm": 11.420139730897464,
"learning_rate": 5.92599822001666e-08,
"logits": -0.5595048666000366,
"logps": -74.4270248413086,
"loss": 0.0478,
"objective": 0.04515855759382248,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5458333492279053,
"regularize": 0.04515855386853218,
"step": 825
},
{
"dpo_loss": 0.5216527581214905,
"epoch": 4.7047709022201225,
"grad_norm": 11.727721968209657,
"learning_rate": 4.9008941453107527e-08,
"logits": -0.6450707912445068,
"logps": -76.4205322265625,
"loss": 0.0531,
"objective": 0.05846472084522247,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 0.058464717119932175,
"step": 830
},
{
"dpo_loss": 0.510058581829071,
"epoch": 4.733112895606991,
"grad_norm": 11.762662946153132,
"learning_rate": 3.972198915970976e-08,
"logits": -0.5860788226127625,
"logps": -76.25663757324219,
"loss": 0.0512,
"objective": 0.04887576773762703,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5541666746139526,
"regularize": 0.048875752836465836,
"step": 835
},
{
"dpo_loss": 0.5002175569534302,
"epoch": 4.7614548889938595,
"grad_norm": 12.325128711177749,
"learning_rate": 3.1402778309014284e-08,
"logits": -0.5927218198776245,
"logps": -76.40971374511719,
"loss": 0.0481,
"objective": 0.04862005263566971,
"ranking_idealized": 0.6208333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.612500011920929,
"regularize": 0.04862004518508911,
"step": 840
},
{
"dpo_loss": 0.532789945602417,
"epoch": 4.7897968823807275,
"grad_norm": 11.538897448416794,
"learning_rate": 2.4054581232470785e-08,
"logits": -0.5980377793312073,
"logps": -76.28124237060547,
"loss": 0.0459,
"objective": 0.0512334480881691,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.574999988079071,
"regularize": 0.0512334480881691,
"step": 845
},
{
"dpo_loss": 0.5221417546272278,
"epoch": 4.818138875767596,
"grad_norm": 11.64226324346139,
"learning_rate": 1.768028831677926e-08,
"logits": -0.5999422073364258,
"logps": -75.14291381835938,
"loss": 0.0447,
"objective": 0.039600715041160583,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6166666746139526,
"regularize": 0.03960070386528969,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.7150014042854309,
"eval_logits": -0.680656373500824,
"eval_logps": -80.54595184326172,
"eval_loss": 0.5835261940956116,
"eval_objective": 0.5760576128959656,
"eval_ranking_idealized": 0.5888429880142212,
"eval_ranking_idealized_expo": 0.5103305578231812,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.5760576128959656,
"eval_runtime": 308.7978,
"eval_samples_per_second": 18.75,
"eval_steps_per_second": 0.784,
"step": 850
},
{
"dpo_loss": 0.5369904637336731,
"epoch": 4.846480869154464,
"grad_norm": 11.853216981436601,
"learning_rate": 1.2282406866966078e-08,
"logits": -0.5668095946311951,
"logps": -75.38987731933594,
"loss": 0.0458,
"objective": 0.04888928309082985,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6333333253860474,
"regularize": 0.048889271914958954,
"step": 855
},
{
"dpo_loss": 0.5126673579216003,
"epoch": 4.874822862541333,
"grad_norm": 12.066886758890655,
"learning_rate": 7.863060120144316e-09,
"logits": -0.5576753616333008,
"logps": -75.38652038574219,
"loss": 0.0467,
"objective": 0.042654138058423996,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6541666388511658,
"regularize": 0.0426541231572628,
"step": 860
},
{
"dpo_loss": 0.5163237452507019,
"epoch": 4.903164855928201,
"grad_norm": 11.881483274725076,
"learning_rate": 4.423986410346526e-09,
"logits": -0.6045090556144714,
"logps": -74.37069702148438,
"loss": 0.0434,
"objective": 0.04223904386162758,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5958333611488342,
"regularize": 0.042239028960466385,
"step": 865
},
{
"dpo_loss": 0.5167383551597595,
"epoch": 4.931506849315069,
"grad_norm": 11.995382070296834,
"learning_rate": 1.9665384847583622e-09,
"logits": -0.6181827783584595,
"logps": -76.19407653808594,
"loss": 0.0445,
"objective": 0.04629291966557503,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6041666865348816,
"regularize": 0.04629291221499443,
"step": 870
},
{
"dpo_loss": 0.5252609252929688,
"epoch": 4.959848842701937,
"grad_norm": 11.846766301958432,
"learning_rate": 4.916829716183901e-10,
"logits": -0.5762243866920471,
"logps": -75.8907241821289,
"loss": 0.0472,
"objective": 0.04822330176830292,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.612500011920929,
"regularize": 0.04822329804301262,
"step": 875
},
{
"dpo_loss": 0.5235641598701477,
"epoch": 4.988190836088805,
"grad_norm": 11.641506789650348,
"learning_rate": 0.0,
"logits": -0.6528417468070984,
"logps": -76.5003433227539,
"loss": 0.0462,
"objective": 0.047958675771951675,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.625,
"regularize": 0.04795865714550018,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.1997517315332185,
"train_runtime": 41462.5211,
"train_samples_per_second": 6.126,
"train_steps_per_second": 0.021
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}