hZzy's picture
Model save
fbcf74a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 134.13611854511257,
"learning_rate": 1.1363636363636363e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6930932998657227,
"epoch": 0.02834199338686821,
"grad_norm": 133.21235814006465,
"learning_rate": 5.6818181818181815e-08,
"logits": -1.3678579330444336,
"logps": -84.42462921142578,
"loss": 0.4128,
"objective": 0.3755575716495514,
"ranking_idealized": 0.5677083134651184,
"ranking_idealized_expo": 0.546875,
"ranking_simple": 0.546875,
"regularize": 0.3755575716495514,
"step": 5
},
{
"dpo_loss": 0.6867414116859436,
"epoch": 0.05668398677373642,
"grad_norm": 142.75690820723997,
"learning_rate": 1.1363636363636363e-07,
"logits": -1.4464198350906372,
"logps": -83.337890625,
"loss": 0.4131,
"objective": 0.43237432837486267,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.43237432837486267,
"step": 10
},
{
"dpo_loss": 0.6844848990440369,
"epoch": 0.08502598016060463,
"grad_norm": 134.06873444315383,
"learning_rate": 1.7045454545454543e-07,
"logits": -1.4124974012374878,
"logps": -83.50977325439453,
"loss": 0.4134,
"objective": 0.3983347713947296,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.3983347713947296,
"step": 15
},
{
"dpo_loss": 0.6885910034179688,
"epoch": 0.11336797354747284,
"grad_norm": 128.26617155521478,
"learning_rate": 2.2727272727272726e-07,
"logits": -1.4028390645980835,
"logps": -84.32319641113281,
"loss": 0.396,
"objective": 0.40446150302886963,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.40446150302886963,
"step": 20
},
{
"dpo_loss": 0.664176881313324,
"epoch": 0.14170996693434104,
"grad_norm": 129.31399976570776,
"learning_rate": 2.840909090909091e-07,
"logits": -1.4484522342681885,
"logps": -84.15487670898438,
"loss": 0.3876,
"objective": 0.3946884274482727,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.3946884274482727,
"step": 25
},
{
"dpo_loss": 0.6644570827484131,
"epoch": 0.17005196032120926,
"grad_norm": 129.8647460663128,
"learning_rate": 3.4090909090909085e-07,
"logits": -1.4183248281478882,
"logps": -84.92523956298828,
"loss": 0.3768,
"objective": 0.38247305154800415,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.550000011920929,
"regularize": 0.38247305154800415,
"step": 30
},
{
"dpo_loss": 0.6450185775756836,
"epoch": 0.19839395370807747,
"grad_norm": 130.99553488282152,
"learning_rate": 3.977272727272727e-07,
"logits": -1.3883568048477173,
"logps": -83.79637145996094,
"loss": 0.3692,
"objective": 0.3310621976852417,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5041666626930237,
"regularize": 0.3310621976852417,
"step": 35
},
{
"dpo_loss": 0.6378442049026489,
"epoch": 0.22673594709494568,
"grad_norm": 124.69353315131622,
"learning_rate": 4.545454545454545e-07,
"logits": -1.3773659467697144,
"logps": -83.89529418945312,
"loss": 0.3685,
"objective": 0.3828122019767761,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5249999761581421,
"regularize": 0.3828122019767761,
"step": 40
},
{
"dpo_loss": 0.6174018383026123,
"epoch": 0.25507794048181387,
"grad_norm": 131.46158666021728,
"learning_rate": 5.113636363636363e-07,
"logits": -1.4973019361495972,
"logps": -85.6716537475586,
"loss": 0.3654,
"objective": 0.4057728052139282,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.550000011920929,
"regularize": 0.40577277541160583,
"step": 45
},
{
"dpo_loss": 0.6175411343574524,
"epoch": 0.2834199338686821,
"grad_norm": 120.92405538498733,
"learning_rate": 5.681818181818182e-07,
"logits": -1.3952524662017822,
"logps": -83.97753143310547,
"loss": 0.3563,
"objective": 0.3284095227718353,
"ranking_idealized": 0.4416666626930237,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.4333333373069763,
"regularize": 0.3284095227718353,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.687321662902832,
"eval_logits": -1.420853853225708,
"eval_logps": -92.33389282226562,
"eval_loss": 0.42110419273376465,
"eval_objective": 0.42802950739860535,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 0.42802950739860535,
"eval_runtime": 269.5653,
"eval_samples_per_second": 21.479,
"eval_steps_per_second": 0.898,
"step": 50
},
{
"dpo_loss": 0.6299411058425903,
"epoch": 0.3117619272555503,
"grad_norm": 143.5714789398323,
"learning_rate": 6.249999999999999e-07,
"logits": -1.4425883293151855,
"logps": -86.13849639892578,
"loss": 0.3849,
"objective": 0.3963293433189392,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 0.3963293433189392,
"step": 55
},
{
"dpo_loss": 0.6125720143318176,
"epoch": 0.3401039206424185,
"grad_norm": 136.6167401869809,
"learning_rate": 6.818181818181817e-07,
"logits": -1.4236023426055908,
"logps": -84.95130157470703,
"loss": 0.3788,
"objective": 0.3742086589336395,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5208333134651184,
"regularize": 0.3742086589336395,
"step": 60
},
{
"dpo_loss": 0.6023539900779724,
"epoch": 0.3684459140292867,
"grad_norm": 145.32268284333037,
"learning_rate": 7.386363636363636e-07,
"logits": -1.3705960512161255,
"logps": -85.3038330078125,
"loss": 0.3824,
"objective": 0.3741196095943451,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.512499988079071,
"regularize": 0.3741196095943451,
"step": 65
},
{
"dpo_loss": 0.6065691709518433,
"epoch": 0.39678790741615494,
"grad_norm": 139.19992394086944,
"learning_rate": 7.954545454545454e-07,
"logits": -1.448442816734314,
"logps": -86.62315368652344,
"loss": 0.3952,
"objective": 0.4089803397655487,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.4089803397655487,
"step": 70
},
{
"dpo_loss": 0.5920066833496094,
"epoch": 0.42512990080302315,
"grad_norm": 124.21133200378506,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4492307901382446,
"logps": -86.64204406738281,
"loss": 0.4203,
"objective": 0.4280329644680023,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5541666746139526,
"regularize": 0.4280329644680023,
"step": 75
},
{
"dpo_loss": 0.5960291028022766,
"epoch": 0.45347189418989137,
"grad_norm": 189.2274993381048,
"learning_rate": 9.09090909090909e-07,
"logits": -1.4888454675674438,
"logps": -86.4117431640625,
"loss": 0.4188,
"objective": 0.41209739446640015,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.512499988079071,
"regularize": 0.41209739446640015,
"step": 80
},
{
"dpo_loss": 0.5794143676757812,
"epoch": 0.4818138875767596,
"grad_norm": 135.1857661271161,
"learning_rate": 9.65909090909091e-07,
"logits": -1.449118971824646,
"logps": -85.0038833618164,
"loss": 0.4453,
"objective": 0.4282515347003937,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5333333611488342,
"regularize": 0.4282515347003937,
"step": 85
},
{
"dpo_loss": 0.5998800992965698,
"epoch": 0.5101558809636277,
"grad_norm": 176.339935388579,
"learning_rate": 9.999842657116664e-07,
"logits": -1.262819766998291,
"logps": -84.80656433105469,
"loss": 0.4721,
"objective": 0.4337444007396698,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.4337444007396698,
"step": 90
},
{
"dpo_loss": 0.5696186423301697,
"epoch": 0.538497874350496,
"grad_norm": 126.57058201712371,
"learning_rate": 9.998072663403656e-07,
"logits": -1.356713056564331,
"logps": -84.7969741821289,
"loss": 0.4657,
"objective": 0.49121516942977905,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5625,
"regularize": 0.49121516942977905,
"step": 95
},
{
"dpo_loss": 0.5877792835235596,
"epoch": 0.5668398677373642,
"grad_norm": 132.17672172143168,
"learning_rate": 9.99433669591504e-07,
"logits": -1.4267613887786865,
"logps": -85.57508850097656,
"loss": 0.4749,
"objective": 0.49508702754974365,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5375000238418579,
"regularize": 0.49508702754974365,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6957917213439941,
"eval_logits": -1.3981915712356567,
"eval_logps": -91.31111907958984,
"eval_loss": 0.46728843450546265,
"eval_objective": 0.4711840748786926,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 0.4711840748786926,
"eval_runtime": 258.6905,
"eval_samples_per_second": 22.382,
"eval_steps_per_second": 0.935,
"step": 100
},
{
"dpo_loss": 0.5984219908714294,
"epoch": 0.5951818611242324,
"grad_norm": 141.88949705555274,
"learning_rate": 9.988636224180095e-07,
"logits": -1.3065966367721558,
"logps": -85.25997924804688,
"loss": 0.4971,
"objective": 0.5261873602867126,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.5261873602867126,
"step": 105
},
{
"dpo_loss": 0.5468531847000122,
"epoch": 0.6235238545111006,
"grad_norm": 118.1204631787259,
"learning_rate": 9.980973490458728e-07,
"logits": -1.4602758884429932,
"logps": -83.59262084960938,
"loss": 0.4868,
"objective": 0.4539487659931183,
"ranking_idealized": 0.47083333134651184,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.47083333134651184,
"regularize": 0.4539487659931183,
"step": 110
},
{
"dpo_loss": 0.595252275466919,
"epoch": 0.6518658478979689,
"grad_norm": 125.27205037996261,
"learning_rate": 9.971351508859486e-07,
"logits": -1.4180139303207397,
"logps": -83.7674560546875,
"loss": 0.5199,
"objective": 0.49929893016815186,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.49929893016815186,
"step": 115
},
{
"dpo_loss": 0.6112563014030457,
"epoch": 0.680207841284837,
"grad_norm": 115.02330529438134,
"learning_rate": 9.959774064153975e-07,
"logits": -1.3522943258285522,
"logps": -84.06304168701172,
"loss": 0.5115,
"objective": 0.5348313450813293,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5375000238418579,
"regularize": 0.5348313450813293,
"step": 120
},
{
"dpo_loss": 0.5981053709983826,
"epoch": 0.7085498346717053,
"grad_norm": 125.9955613878692,
"learning_rate": 9.94624571028813e-07,
"logits": -1.3196688890457153,
"logps": -82.79936981201172,
"loss": 0.5262,
"objective": 0.5262423157691956,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5666666626930237,
"regularize": 0.5262423157691956,
"step": 125
},
{
"dpo_loss": 0.5788747668266296,
"epoch": 0.7368918280585735,
"grad_norm": 127.79997373259253,
"learning_rate": 9.930771768590933e-07,
"logits": -1.4446656703948975,
"logps": -82.71467590332031,
"loss": 0.5057,
"objective": 0.5254269242286682,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.47083333134651184,
"regularize": 0.5254269242286682,
"step": 130
},
{
"dpo_loss": 0.5741093754768372,
"epoch": 0.7652338214454416,
"grad_norm": 119.65644724778777,
"learning_rate": 9.91335832568129e-07,
"logits": -1.412257432937622,
"logps": -85.09854125976562,
"loss": 0.5219,
"objective": 0.5582370758056641,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5416666865348816,
"regularize": 0.5582370758056641,
"step": 135
},
{
"dpo_loss": 0.5746143460273743,
"epoch": 0.7935758148323099,
"grad_norm": 117.38563727058376,
"learning_rate": 9.894012231073895e-07,
"logits": -1.307041049003601,
"logps": -83.801025390625,
"loss": 0.5804,
"objective": 0.5208708643913269,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5291666388511658,
"regularize": 0.5208708643913269,
"step": 140
},
{
"dpo_loss": 0.5846189856529236,
"epoch": 0.821917808219178,
"grad_norm": 130.2534703806674,
"learning_rate": 9.872741094484964e-07,
"logits": -1.3578908443450928,
"logps": -85.40940856933594,
"loss": 0.5595,
"objective": 0.5616883635520935,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.48750001192092896,
"regularize": 0.5616883635520935,
"step": 145
},
{
"dpo_loss": 0.5902164578437805,
"epoch": 0.8502598016060463,
"grad_norm": 124.65999306889285,
"learning_rate": 9.849553282839024e-07,
"logits": -1.3182071447372437,
"logps": -83.66584014892578,
"loss": 0.5468,
"objective": 0.5547689199447632,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5333333611488342,
"regularize": 0.5547689199447632,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.7061101198196411,
"eval_logits": -1.3204107284545898,
"eval_logps": -91.07585144042969,
"eval_loss": 0.5596091151237488,
"eval_objective": 0.5683301687240601,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 0.5683301687240601,
"eval_runtime": 262.2295,
"eval_samples_per_second": 22.08,
"eval_steps_per_second": 0.923,
"step": 150
},
{
"dpo_loss": 0.6094241738319397,
"epoch": 0.8786017949929145,
"grad_norm": 120.42788649324545,
"learning_rate": 9.824457916977784e-07,
"logits": -1.3233551979064941,
"logps": -83.14967346191406,
"loss": 0.5545,
"objective": 0.5938875675201416,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5416666865348816,
"regularize": 0.5938875675201416,
"step": 155
},
{
"dpo_loss": 0.5945659875869751,
"epoch": 0.9069437883797827,
"grad_norm": 130.17538489613264,
"learning_rate": 9.797464868072486e-07,
"logits": -1.2508231401443481,
"logps": -84.22561645507812,
"loss": 0.5582,
"objective": 0.5726324319839478,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.5726324319839478,
"step": 160
},
{
"dpo_loss": 0.5802300572395325,
"epoch": 0.9352857817666509,
"grad_norm": 108.46916479196223,
"learning_rate": 9.768584753741134e-07,
"logits": -1.2593294382095337,
"logps": -84.2457046508789,
"loss": 0.5428,
"objective": 0.5054454207420349,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.550000011920929,
"regularize": 0.5054454207420349,
"step": 165
},
{
"dpo_loss": 0.5963813066482544,
"epoch": 0.9636277751535192,
"grad_norm": 112.45672430142012,
"learning_rate": 9.737828933872073e-07,
"logits": -1.3053935766220093,
"logps": -84.02145385742188,
"loss": 0.5633,
"objective": 0.5686451196670532,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5333333611488342,
"regularize": 0.5686451196670532,
"step": 170
},
{
"dpo_loss": 0.6094807982444763,
"epoch": 0.9919697685403873,
"grad_norm": 112.35460354829323,
"learning_rate": 9.705209506155634e-07,
"logits": -1.2585358619689941,
"logps": -84.55461883544922,
"loss": 0.5613,
"objective": 0.567017674446106,
"ranking_idealized": 0.5958333611488342,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5874999761581421,
"regularize": 0.567017674446106,
"step": 175
},
{
"dpo_loss": 0.5619717836380005,
"epoch": 1.0203117619272555,
"grad_norm": 111.48678467404953,
"learning_rate": 9.670739301325534e-07,
"logits": -1.4066673517227173,
"logps": -83.15278625488281,
"loss": 0.567,
"objective": 0.633197009563446,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5416666865348816,
"regularize": 0.633197009563446,
"step": 180
},
{
"dpo_loss": 0.5666179656982422,
"epoch": 1.0486537553141237,
"grad_norm": 131.38985186167173,
"learning_rate": 9.63443187811197e-07,
"logits": -1.2431377172470093,
"logps": -83.49596405029297,
"loss": 0.5655,
"objective": 0.5990104079246521,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.512499988079071,
"regularize": 0.5990104079246521,
"step": 185
},
{
"dpo_loss": 0.5910226106643677,
"epoch": 1.076995748700992,
"grad_norm": 121.16034753219803,
"learning_rate": 9.596301517908328e-07,
"logits": -1.3014639616012573,
"logps": -85.08562469482422,
"loss": 0.5836,
"objective": 0.6070137023925781,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6070137023925781,
"step": 190
},
{
"dpo_loss": 0.6265178918838501,
"epoch": 1.10533774208786,
"grad_norm": 113.24543776185278,
"learning_rate": 9.556363219153662e-07,
"logits": -1.2846567630767822,
"logps": -83.17445373535156,
"loss": 0.5509,
"objective": 0.5626471638679504,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5833333134651184,
"regularize": 0.5626471638679504,
"step": 195
},
{
"dpo_loss": 0.5952022671699524,
"epoch": 1.1336797354747283,
"grad_norm": 109.3282186534433,
"learning_rate": 9.514632691433106e-07,
"logits": -1.3481907844543457,
"logps": -82.64450073242188,
"loss": 0.5501,
"objective": 0.5247953534126282,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 0.5247953534126282,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.7155318856239319,
"eval_logits": -1.3003575801849365,
"eval_logps": -89.7054672241211,
"eval_loss": 0.6048148274421692,
"eval_objective": 0.5995473861694336,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5061983466148376,
"eval_regularize": 0.5995473861694336,
"eval_runtime": 259.4012,
"eval_samples_per_second": 22.321,
"eval_steps_per_second": 0.933,
"step": 200
},
{
"dpo_loss": 0.5928524136543274,
"epoch": 1.1620217288615966,
"grad_norm": 117.4507736272679,
"learning_rate": 9.471126349298556e-07,
"logits": -1.304435133934021,
"logps": -83.2208480834961,
"loss": 0.5617,
"objective": 0.569757878780365,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5041666626930237,
"regularize": 0.569757878780365,
"step": 205
},
{
"dpo_loss": 0.5430352687835693,
"epoch": 1.1903637222484649,
"grad_norm": 108.28365774149364,
"learning_rate": 9.425861305812081e-07,
"logits": -1.28606116771698,
"logps": -83.92695617675781,
"loss": 0.5256,
"objective": 0.4972154200077057,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5083333253860474,
"regularize": 0.4972154200077057,
"step": 210
},
{
"dpo_loss": 0.6411459445953369,
"epoch": 1.2187057156353331,
"grad_norm": 117.64538520858981,
"learning_rate": 9.378855365814557e-07,
"logits": -1.2710984945297241,
"logps": -83.3528823852539,
"loss": 0.5386,
"objective": 0.6103346943855286,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5041666626930237,
"regularize": 0.6103346943855286,
"step": 215
},
{
"dpo_loss": 0.6046126484870911,
"epoch": 1.2470477090222012,
"grad_norm": 112.2724627593423,
"learning_rate": 9.330127018922193e-07,
"logits": -1.22818124294281,
"logps": -82.01517486572266,
"loss": 0.5413,
"objective": 0.5305136442184448,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.6083333492279053,
"regularize": 0.5305136442184448,
"step": 220
},
{
"dpo_loss": 0.5910329222679138,
"epoch": 1.2753897024090695,
"grad_norm": 114.94988107695234,
"learning_rate": 9.279695432253708e-07,
"logits": -1.3015425205230713,
"logps": -83.29090881347656,
"loss": 0.5262,
"objective": 0.528804361820221,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 0.528804361820221,
"step": 225
},
{
"dpo_loss": 0.5783368945121765,
"epoch": 1.3037316957959377,
"grad_norm": 112.00444337751402,
"learning_rate": 9.227580442891021e-07,
"logits": -1.2694045305252075,
"logps": -82.34114837646484,
"loss": 0.5259,
"objective": 0.5298904180526733,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5208333134651184,
"regularize": 0.5298903584480286,
"step": 230
},
{
"dpo_loss": 0.559749186038971,
"epoch": 1.3320736891828058,
"grad_norm": 117.1742133311728,
"learning_rate": 9.173802550076401e-07,
"logits": -1.3496894836425781,
"logps": -80.74533081054688,
"loss": 0.5275,
"objective": 0.5420801639556885,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.512499988079071,
"regularize": 0.5420801639556885,
"step": 235
},
{
"dpo_loss": 0.5604056715965271,
"epoch": 1.360415682569674,
"grad_norm": 120.20610234512412,
"learning_rate": 9.118382907149163e-07,
"logits": -1.2856090068817139,
"logps": -81.93352508544922,
"loss": 0.5307,
"objective": 0.4905354678630829,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.4905354678630829,
"step": 240
},
{
"dpo_loss": 0.5622699856758118,
"epoch": 1.3887576759565423,
"grad_norm": 112.51743847797803,
"learning_rate": 9.061343313225087e-07,
"logits": -1.3067680597305298,
"logps": -81.71417999267578,
"loss": 0.4769,
"objective": 0.4980693757534027,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 0.4980693757534027,
"step": 245
},
{
"dpo_loss": 0.5571755766868591,
"epoch": 1.4170996693434104,
"grad_norm": 105.16402218584498,
"learning_rate": 9.002706204621802e-07,
"logits": -1.2600089311599731,
"logps": -81.47142028808594,
"loss": 0.5045,
"objective": 0.48103129863739014,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.48750001192092896,
"regularize": 0.48103129863739014,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7336850166320801,
"eval_logits": -1.319351077079773,
"eval_logps": -88.7256088256836,
"eval_loss": 0.6191251873970032,
"eval_objective": 0.6396905779838562,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5175619721412659,
"eval_regularize": 0.6396905779838562,
"eval_runtime": 261.8978,
"eval_samples_per_second": 22.108,
"eval_steps_per_second": 0.924,
"step": 250
},
{
"dpo_loss": 0.5806334614753723,
"epoch": 1.4454416627302786,
"grad_norm": 108.9060179401822,
"learning_rate": 8.942494646033554e-07,
"logits": -1.268131971359253,
"logps": -82.72068786621094,
"loss": 0.5226,
"objective": 0.5074787735939026,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5625,
"regularize": 0.5074787735939026,
"step": 255
},
{
"dpo_loss": 0.6120874285697937,
"epoch": 1.473783656117147,
"grad_norm": 113.8812403973121,
"learning_rate": 8.880732321458784e-07,
"logits": -1.3066486120224,
"logps": -84.5510482788086,
"loss": 0.5203,
"objective": 0.5250194072723389,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5041666626930237,
"regularize": 0.5250194072723389,
"step": 260
},
{
"dpo_loss": 0.5722587704658508,
"epoch": 1.5021256495040152,
"grad_norm": 107.47217195176817,
"learning_rate": 8.817443524884117e-07,
"logits": -1.302085280418396,
"logps": -82.64102935791016,
"loss": 0.5125,
"objective": 0.5349072813987732,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5375000238418579,
"regularize": 0.5349072813987732,
"step": 265
},
{
"dpo_loss": 0.5912469625473022,
"epoch": 1.5304676428908834,
"grad_norm": 102.61360328866256,
"learning_rate": 8.752653150728411e-07,
"logits": -1.33375883102417,
"logps": -83.93022918701172,
"loss": 0.4989,
"objective": 0.5215144753456116,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.5215144753456116,
"step": 270
},
{
"dpo_loss": 0.5620048642158508,
"epoch": 1.5588096362777515,
"grad_norm": 120.75621294263999,
"learning_rate": 8.68638668405062e-07,
"logits": -1.346874713897705,
"logps": -83.51717376708984,
"loss": 0.508,
"objective": 0.49223792552948,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5416666865348816,
"regularize": 0.49223792552948,
"step": 275
},
{
"dpo_loss": 0.5491313934326172,
"epoch": 1.5871516296646198,
"grad_norm": 111.38123469249379,
"learning_rate": 8.61867019052535e-07,
"logits": -1.2457977533340454,
"logps": -82.07854461669922,
"loss": 0.4912,
"objective": 0.4965251088142395,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5249999761581421,
"regularize": 0.4965251088142395,
"step": 280
},
{
"dpo_loss": 0.597549319267273,
"epoch": 1.615493623051488,
"grad_norm": 108.78031876782775,
"learning_rate": 8.549530306190014e-07,
"logits": -1.3423717021942139,
"logps": -83.64887237548828,
"loss": 0.4878,
"objective": 0.5166993141174316,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 0.5166993141174316,
"step": 285
},
{
"dpo_loss": 0.5523730516433716,
"epoch": 1.643835616438356,
"grad_norm": 101.86340958175018,
"learning_rate": 8.478994226967638e-07,
"logits": -1.3935832977294922,
"logps": -81.38288116455078,
"loss": 0.4753,
"objective": 0.45616135001182556,
"ranking_idealized": 0.5708333253860474,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5791666507720947,
"regularize": 0.45616135001182556,
"step": 290
},
{
"dpo_loss": 0.5671105980873108,
"epoch": 1.6721776098252243,
"grad_norm": 97.05334097299942,
"learning_rate": 8.407089697969456e-07,
"logits": -1.3089579343795776,
"logps": -79.79814910888672,
"loss": 0.4806,
"objective": 0.48036739230155945,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5041666626930237,
"regularize": 0.48036739230155945,
"step": 295
},
{
"dpo_loss": 0.5690053701400757,
"epoch": 1.7005196032120926,
"grad_norm": 103.94959921577468,
"learning_rate": 8.333845002581458e-07,
"logits": -1.3550407886505127,
"logps": -81.06673431396484,
"loss": 0.5191,
"objective": 0.5282995700836182,
"ranking_idealized": 0.5916666388511658,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6041666865348816,
"regularize": 0.5282995700836182,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.7341631650924683,
"eval_logits": -1.3167152404785156,
"eval_logps": -87.61885833740234,
"eval_loss": 0.6501524448394775,
"eval_objective": 0.6544150114059448,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5165289044380188,
"eval_regularize": 0.6544150114059448,
"eval_runtime": 258.5096,
"eval_samples_per_second": 22.398,
"eval_steps_per_second": 0.936,
"step": 300
},
{
"dpo_loss": 0.5860777497291565,
"epoch": 1.7288615965989607,
"grad_norm": 101.19563914464877,
"learning_rate": 8.259288951339232e-07,
"logits": -1.290361762046814,
"logps": -81.49486541748047,
"loss": 0.4715,
"objective": 0.5236480236053467,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5333333611488342,
"regularize": 0.5236480236053467,
"step": 305
},
{
"dpo_loss": 0.5874544382095337,
"epoch": 1.7572035899858292,
"grad_norm": 101.32381454333631,
"learning_rate": 8.183450870595441e-07,
"logits": -1.323080062866211,
"logps": -79.28206634521484,
"loss": 0.4743,
"objective": 0.4995422959327698,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.4995422959327698,
"step": 310
},
{
"dpo_loss": 0.5733292698860168,
"epoch": 1.7855455833726972,
"grad_norm": 101.22251777355339,
"learning_rate": 8.106360590984404e-07,
"logits": -1.2535948753356934,
"logps": -80.41255950927734,
"loss": 0.4496,
"objective": 0.44809386134147644,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 0.44809386134147644,
"step": 315
},
{
"dpo_loss": 0.5748218297958374,
"epoch": 1.8138875767595655,
"grad_norm": 102.63800844635435,
"learning_rate": 8.028048435688333e-07,
"logits": -1.3195849657058716,
"logps": -82.20491027832031,
"loss": 0.464,
"objective": 0.4412696659564972,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5249999761581421,
"regularize": 0.4412696659564972,
"step": 320
},
{
"dpo_loss": 0.5583847761154175,
"epoch": 1.8422295701464337,
"grad_norm": 106.29080888058539,
"learning_rate": 7.948545208509811e-07,
"logits": -1.3646790981292725,
"logps": -83.66368865966797,
"loss": 0.4757,
"objective": 0.43752506375312805,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 0.43752506375312805,
"step": 325
},
{
"dpo_loss": 0.5669862627983093,
"epoch": 1.8705715635333018,
"grad_norm": 114.61344867316569,
"learning_rate": 7.86788218175523e-07,
"logits": -1.1525540351867676,
"logps": -81.54851531982422,
"loss": 0.4342,
"objective": 0.4504217505455017,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.48750001192092896,
"regularize": 0.4504217505455017,
"step": 330
},
{
"dpo_loss": 0.5577248334884644,
"epoch": 1.89891355692017,
"grad_norm": 107.68338896171758,
"learning_rate": 7.786091083933949e-07,
"logits": -1.3191189765930176,
"logps": -82.01612091064453,
"loss": 0.4554,
"objective": 0.4512566030025482,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5208333134651184,
"regularize": 0.4512566030025482,
"step": 335
},
{
"dpo_loss": 0.5417340993881226,
"epoch": 1.9272555503070383,
"grad_norm": 113.63225902754026,
"learning_rate": 7.703204087277988e-07,
"logits": -1.311868667602539,
"logps": -84.563720703125,
"loss": 0.4472,
"objective": 0.4667154550552368,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5666666626930237,
"regularize": 0.46671542525291443,
"step": 340
},
{
"dpo_loss": 0.577439546585083,
"epoch": 1.9555975436939064,
"grad_norm": 105.47346696011448,
"learning_rate": 7.619253795087208e-07,
"logits": -1.309786081314087,
"logps": -82.33320617675781,
"loss": 0.4317,
"objective": 0.45227622985839844,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5416666865348816,
"regularize": 0.45227622985839844,
"step": 345
},
{
"dpo_loss": 0.5885747671127319,
"epoch": 1.9839395370807746,
"grad_norm": 109.05004528153341,
"learning_rate": 7.534273228904915e-07,
"logits": -1.22944974899292,
"logps": -83.1298599243164,
"loss": 0.4473,
"objective": 0.4539943039417267,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4539943039417267,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.7476473450660706,
"eval_logits": -1.3002201318740845,
"eval_logps": -88.6204833984375,
"eval_loss": 0.6869083642959595,
"eval_objective": 0.6877632737159729,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 0.6877632737159729,
"eval_runtime": 258.9539,
"eval_samples_per_second": 22.359,
"eval_steps_per_second": 0.935,
"step": 350
},
{
"dpo_loss": 0.5712209343910217,
"epoch": 2.012281530467643,
"grad_norm": 101.66242864172723,
"learning_rate": 7.448295815528956e-07,
"logits": -1.2166054248809814,
"logps": -82.02034759521484,
"loss": 0.4247,
"objective": 0.4026139974594116,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4026139974594116,
"step": 355
},
{
"dpo_loss": 0.5501471161842346,
"epoch": 2.040623523854511,
"grad_norm": 111.09255218939096,
"learning_rate": 7.361355373863413e-07,
"logits": -1.2762349843978882,
"logps": -80.2708969116211,
"loss": 0.4188,
"objective": 0.3990139663219452,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5541666746139526,
"regularize": 0.3990139663219452,
"step": 360
},
{
"dpo_loss": 0.5589497089385986,
"epoch": 2.0689655172413794,
"grad_norm": 102.40571901595573,
"learning_rate": 7.273486101616056e-07,
"logits": -1.3285800218582153,
"logps": -82.2364730834961,
"loss": 0.4468,
"objective": 0.4508870840072632,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5333333611488342,
"regularize": 0.4508870840072632,
"step": 365
},
{
"dpo_loss": 0.5617626309394836,
"epoch": 2.0973075106282475,
"grad_norm": 97.76405475943756,
"learning_rate": 7.184722561846797e-07,
"logits": -1.2896106243133545,
"logps": -79.0242691040039,
"loss": 0.4108,
"objective": 0.4170443117618561,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.4170443117618561,
"step": 370
},
{
"dpo_loss": 0.5481951236724854,
"epoch": 2.1256495040151155,
"grad_norm": 102.54464966495118,
"learning_rate": 7.095099669372443e-07,
"logits": -1.2680145502090454,
"logps": -80.49474334716797,
"loss": 0.4088,
"objective": 0.3847118020057678,
"ranking_idealized": 0.625,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.612500011920929,
"regularize": 0.3847118020057678,
"step": 375
},
{
"dpo_loss": 0.5886502861976624,
"epoch": 2.153991497401984,
"grad_norm": 99.36933926359579,
"learning_rate": 7.004652677033068e-07,
"logits": -1.2002967596054077,
"logps": -79.81450653076172,
"loss": 0.4196,
"objective": 0.43431776762008667,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5416666865348816,
"regularize": 0.43431776762008667,
"step": 380
},
{
"dpo_loss": 0.566298246383667,
"epoch": 2.182333490788852,
"grad_norm": 100.41990113674308,
"learning_rate": 6.913417161825449e-07,
"logits": -1.307159423828125,
"logps": -81.99685668945312,
"loss": 0.4125,
"objective": 0.4449020326137543,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5375000238418579,
"regularize": 0.4449020326137543,
"step": 385
},
{
"dpo_loss": 0.5851306319236755,
"epoch": 2.21067548417572,
"grad_norm": 101.58148277220718,
"learning_rate": 6.821429010908971e-07,
"logits": -1.2254120111465454,
"logps": -80.0687484741211,
"loss": 0.4184,
"objective": 0.41351255774497986,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.4791666567325592,
"regularize": 0.41351255774497986,
"step": 390
},
{
"dpo_loss": 0.5543534159660339,
"epoch": 2.2390174775625886,
"grad_norm": 102.754307701718,
"learning_rate": 6.728724407489553e-07,
"logits": -1.2246791124343872,
"logps": -80.76174926757812,
"loss": 0.394,
"objective": 0.3953525424003601,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 0.3953525424003601,
"step": 395
},
{
"dpo_loss": 0.5204497575759888,
"epoch": 2.2673594709494567,
"grad_norm": 105.48313372380032,
"learning_rate": 6.635339816587108e-07,
"logits": -1.2704825401306152,
"logps": -82.68233489990234,
"loss": 0.3926,
"objective": 0.4119986891746521,
"ranking_idealized": 0.6000000238418579,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.4119986891746521,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.7518758773803711,
"eval_logits": -1.273954153060913,
"eval_logps": -87.79329681396484,
"eval_loss": 0.708670973777771,
"eval_objective": 0.7146722078323364,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 0.7146722078323364,
"eval_runtime": 259.1203,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 0.934,
"step": 400
},
{
"dpo_loss": 0.5729255676269531,
"epoch": 2.295701464336325,
"grad_norm": 97.83556162175466,
"learning_rate": 6.541311970692162e-07,
"logits": -1.3065953254699707,
"logps": -81.27420043945312,
"loss": 0.388,
"objective": 0.3636757433414459,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.574999988079071,
"regularize": 0.3636757433414459,
"step": 405
},
{
"dpo_loss": 0.5597470998764038,
"epoch": 2.324043457723193,
"grad_norm": 103.83650251762533,
"learning_rate": 6.446677855317264e-07,
"logits": -1.1975177526474,
"logps": -80.72605895996094,
"loss": 0.3899,
"objective": 0.39117977023124695,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.4958333373069763,
"regularize": 0.39117977023124695,
"step": 410
},
{
"dpo_loss": 0.5665189027786255,
"epoch": 2.3523854511100613,
"grad_norm": 98.79350448658165,
"learning_rate": 6.351474694448864e-07,
"logits": -1.227319598197937,
"logps": -81.35487365722656,
"loss": 0.3741,
"objective": 0.38428404927253723,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5541666746139526,
"regularize": 0.38428401947021484,
"step": 415
},
{
"dpo_loss": 0.5536468029022217,
"epoch": 2.3807274444969297,
"grad_norm": 107.78716307916815,
"learning_rate": 6.255739935905395e-07,
"logits": -1.1353336572647095,
"logps": -83.33781433105469,
"loss": 0.3686,
"objective": 0.3524840176105499,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4833333194255829,
"regularize": 0.3524840176105499,
"step": 420
},
{
"dpo_loss": 0.5507148504257202,
"epoch": 2.409069437883798,
"grad_norm": 95.31420015325158,
"learning_rate": 6.159511236607315e-07,
"logits": -1.186567783355713,
"logps": -81.95353698730469,
"loss": 0.3829,
"objective": 0.3914056718349457,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5166666507720947,
"regularize": 0.3914056718349457,
"step": 425
},
{
"dpo_loss": 0.5727216601371765,
"epoch": 2.4374114312706663,
"grad_norm": 113.51215777470752,
"learning_rate": 6.062826447764883e-07,
"logits": -1.2177358865737915,
"logps": -82.20878601074219,
"loss": 0.3667,
"objective": 0.38485896587371826,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.550000011920929,
"regularize": 0.38485896587371826,
"step": 430
},
{
"dpo_loss": 0.5795385837554932,
"epoch": 2.4657534246575343,
"grad_norm": 109.86296645766184,
"learning_rate": 5.965723599989528e-07,
"logits": -1.2774423360824585,
"logps": -81.68889617919922,
"loss": 0.3742,
"objective": 0.373045951128006,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5208333134651184,
"regularize": 0.373045951128006,
"step": 435
},
{
"dpo_loss": 0.5365008115768433,
"epoch": 2.4940954180444024,
"grad_norm": 111.81192989308617,
"learning_rate": 5.868240888334652e-07,
"logits": -1.2080743312835693,
"logps": -81.38243865966797,
"loss": 0.3754,
"objective": 0.3666243851184845,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.3666243851184845,
"step": 440
},
{
"dpo_loss": 0.5457199811935425,
"epoch": 2.5224374114312704,
"grad_norm": 111.8881514776807,
"learning_rate": 5.770416657271728e-07,
"logits": -1.220347285270691,
"logps": -79.05127716064453,
"loss": 0.3504,
"objective": 0.3269334137439728,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5375000238418579,
"regularize": 0.3269334137439728,
"step": 445
},
{
"dpo_loss": 0.5769312977790833,
"epoch": 2.550779404818139,
"grad_norm": 99.97534603124807,
"learning_rate": 5.67228938560766e-07,
"logits": -1.2545174360275269,
"logps": -81.12376403808594,
"loss": 0.3583,
"objective": 0.3686024844646454,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.512499988079071,
"regularize": 0.3686024844646454,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.7467784881591797,
"eval_logits": -1.2637890577316284,
"eval_logps": -87.71803283691406,
"eval_loss": 0.6997210383415222,
"eval_objective": 0.7073113918304443,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5092975497245789,
"eval_regularize": 0.7073113918304443,
"eval_runtime": 259.5146,
"eval_samples_per_second": 22.311,
"eval_steps_per_second": 0.933,
"step": 450
},
{
"dpo_loss": 0.5546112656593323,
"epoch": 2.579121398205007,
"grad_norm": 103.50827450574826,
"learning_rate": 5.573897671349268e-07,
"logits": -1.282201886177063,
"logps": -82.07099151611328,
"loss": 0.3709,
"objective": 0.3527103662490845,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.512499988079071,
"regularize": 0.3527103662490845,
"step": 455
},
{
"dpo_loss": 0.5413371920585632,
"epoch": 2.6074633915918755,
"grad_norm": 103.11622894521913,
"learning_rate": 5.475280216520912e-07,
"logits": -1.3678227663040161,
"logps": -82.53510284423828,
"loss": 0.3493,
"objective": 0.3583069443702698,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5249999761581421,
"regularize": 0.3583069443702698,
"step": 460
},
{
"dpo_loss": 0.5440939664840698,
"epoch": 2.6358053849787435,
"grad_norm": 101.71475758586865,
"learning_rate": 5.376475811941191e-07,
"logits": -1.2582870721817017,
"logps": -81.93384552001953,
"loss": 0.3418,
"objective": 0.3399708867073059,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.512499988079071,
"regularize": 0.3399708867073059,
"step": 465
},
{
"dpo_loss": 0.5473323464393616,
"epoch": 2.6641473783656116,
"grad_norm": 106.63182084368584,
"learning_rate": 5.277523321964701e-07,
"logits": -1.266352891921997,
"logps": -80.5593490600586,
"loss": 0.3361,
"objective": 0.3204086124897003,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3204086124897003,
"step": 470
},
{
"dpo_loss": 0.5676419734954834,
"epoch": 2.69248937175248,
"grad_norm": 100.07291475508246,
"learning_rate": 5.178461669194903e-07,
"logits": -1.2326582670211792,
"logps": -81.09722137451172,
"loss": 0.3221,
"objective": 0.3266924321651459,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5083333253860474,
"regularize": 0.3266924321651459,
"step": 475
},
{
"dpo_loss": 0.5700205564498901,
"epoch": 2.720831365139348,
"grad_norm": 114.43771495362513,
"learning_rate": 5.07932981917404e-07,
"logits": -1.3194983005523682,
"logps": -80.66010284423828,
"loss": 0.3398,
"objective": 0.3393530547618866,
"ranking_idealized": 0.5249999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5166666507720947,
"regularize": 0.3393530547618866,
"step": 480
},
{
"dpo_loss": 0.540759801864624,
"epoch": 2.7491733585262166,
"grad_norm": 103.16997454398687,
"learning_rate": 4.980166765056193e-07,
"logits": -1.254343032836914,
"logps": -82.84033203125,
"loss": 0.3085,
"objective": 0.3383033871650696,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5333333611488342,
"regularize": 0.3383033871650696,
"step": 485
},
{
"dpo_loss": 0.5252385139465332,
"epoch": 2.7775153519130846,
"grad_norm": 106.99635988259067,
"learning_rate": 4.881011512269463e-07,
"logits": -1.227257251739502,
"logps": -80.78081512451172,
"loss": 0.3134,
"objective": 0.3047190010547638,
"ranking_idealized": 0.5041666626930237,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.4958333373069763,
"regularize": 0.3047190010547638,
"step": 490
},
{
"dpo_loss": 0.5381739139556885,
"epoch": 2.8058573452999527,
"grad_norm": 96.94469590251771,
"learning_rate": 4.78190306317332e-07,
"logits": -1.2355995178222656,
"logps": -80.06063842773438,
"loss": 0.3118,
"objective": 0.31756675243377686,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4958333373069763,
"regularize": 0.31756675243377686,
"step": 495
},
{
"dpo_loss": 0.537841796875,
"epoch": 2.8341993386868207,
"grad_norm": 101.86509864093333,
"learning_rate": 4.682880401717177e-07,
"logits": -1.2800052165985107,
"logps": -79.0132064819336,
"loss": 0.2969,
"objective": 0.2936791479587555,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5416666865348816,
"regularize": 0.2936791479587555,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.7569719552993774,
"eval_logits": -1.2819868326187134,
"eval_logps": -87.59934997558594,
"eval_loss": 0.7206214070320129,
"eval_objective": 0.7300238609313965,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 0.7300238609313965,
"eval_runtime": 259.8973,
"eval_samples_per_second": 22.278,
"eval_steps_per_second": 0.931,
"step": 500
},
{
"dpo_loss": 0.5372523069381714,
"epoch": 2.862541332073689,
"grad_norm": 99.30099425250114,
"learning_rate": 4.5839824781061886e-07,
"logits": -1.239542841911316,
"logps": -80.23788452148438,
"loss": 0.2969,
"objective": 0.2953982949256897,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5458333492279053,
"regularize": 0.2953982949256897,
"step": 505
},
{
"dpo_loss": 0.551425576210022,
"epoch": 2.8908833254605573,
"grad_norm": 97.19792911916865,
"learning_rate": 4.4852481934803277e-07,
"logits": -1.1939424276351929,
"logps": -80.47624206542969,
"loss": 0.2884,
"objective": 0.2630988359451294,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.550000011920929,
"regularize": 0.2630988359451294,
"step": 510
},
{
"dpo_loss": 0.553286612033844,
"epoch": 2.9192253188474258,
"grad_norm": 105.92002880687674,
"learning_rate": 4.3867163846127674e-07,
"logits": -1.3426212072372437,
"logps": -80.19091796875,
"loss": 0.2858,
"objective": 0.2995811402797699,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5,
"regularize": 0.2995811104774475,
"step": 515
},
{
"dpo_loss": 0.5569177269935608,
"epoch": 2.947567312234294,
"grad_norm": 101.90420898944761,
"learning_rate": 4.2884258086335745e-07,
"logits": -1.2548803091049194,
"logps": -82.30675506591797,
"loss": 0.2885,
"objective": 0.2949267625808716,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5291666388511658,
"regularize": 0.2949267625808716,
"step": 520
},
{
"dpo_loss": 0.5335093140602112,
"epoch": 2.975909305621162,
"grad_norm": 118.61355678596335,
"learning_rate": 4.1904151277847305e-07,
"logits": -1.2422376871109009,
"logps": -81.52430725097656,
"loss": 0.2939,
"objective": 0.277982234954834,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5874999761581421,
"regularize": 0.277982234954834,
"step": 525
},
{
"dpo_loss": 0.5339943766593933,
"epoch": 3.0042512990080303,
"grad_norm": 95.3080580242307,
"learning_rate": 4.092722894212487e-07,
"logits": -1.245071530342102,
"logps": -80.53169250488281,
"loss": 0.2807,
"objective": 0.28686758875846863,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5249999761581421,
"regularize": 0.28686758875846863,
"step": 530
},
{
"dpo_loss": 0.5502054691314697,
"epoch": 3.0325932923948984,
"grad_norm": 102.03710421357799,
"learning_rate": 3.995387534803005e-07,
"logits": -1.2770276069641113,
"logps": -82.10601043701172,
"loss": 0.2686,
"objective": 0.2612551748752594,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5541666746139526,
"regularize": 0.2612551748752594,
"step": 535
},
{
"dpo_loss": 0.5417371392250061,
"epoch": 3.0609352857817664,
"grad_norm": 93.88545782890976,
"learning_rate": 3.8984473360672967e-07,
"logits": -1.3437141180038452,
"logps": -80.88935089111328,
"loss": 0.2546,
"objective": 0.2543182373046875,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5416666865348816,
"regularize": 0.2543182373046875,
"step": 540
},
{
"dpo_loss": 0.5468733906745911,
"epoch": 3.089277279168635,
"grad_norm": 104.98103692622652,
"learning_rate": 3.801940429081345e-07,
"logits": -1.29421067237854,
"logps": -81.87580108642578,
"loss": 0.2483,
"objective": 0.23621630668640137,
"ranking_idealized": 0.5,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.49166667461395264,
"regularize": 0.23621630668640137,
"step": 545
},
{
"dpo_loss": 0.5142976641654968,
"epoch": 3.117619272555503,
"grad_norm": 98.44480374148442,
"learning_rate": 3.7059047744873955e-07,
"logits": -1.1407238245010376,
"logps": -82.03406524658203,
"loss": 0.2456,
"objective": 0.23313947021961212,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5666666626930237,
"regularize": 0.23313947021961212,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.7502080202102661,
"eval_logits": -1.2747371196746826,
"eval_logps": -87.48565673828125,
"eval_loss": 0.7081578969955444,
"eval_objective": 0.7094937562942505,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5134297609329224,
"eval_regularize": 0.7094937562942505,
"eval_runtime": 259.3716,
"eval_samples_per_second": 22.323,
"eval_steps_per_second": 0.933,
"step": 550
},
{
"dpo_loss": 0.519844651222229,
"epoch": 3.1459612659423715,
"grad_norm": 110.59601884737206,
"learning_rate": 3.6103781475622786e-07,
"logits": -1.2129274606704712,
"logps": -82.34542083740234,
"loss": 0.2514,
"objective": 0.24437649548053741,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5791666507720947,
"regularize": 0.24437648057937622,
"step": 555
},
{
"dpo_loss": 0.5489280223846436,
"epoch": 3.1743032593292395,
"grad_norm": 96.14728671233563,
"learning_rate": 3.5153981233586274e-07,
"logits": -1.213472843170166,
"logps": -79.25808715820312,
"loss": 0.2372,
"objective": 0.2192198634147644,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5166666507720947,
"regularize": 0.2192198634147644,
"step": 560
},
{
"dpo_loss": 0.537466287612915,
"epoch": 3.2026452527161076,
"grad_norm": 107.72259319859731,
"learning_rate": 3.421002061924876e-07,
"logits": -1.2699893712997437,
"logps": -80.88977813720703,
"loss": 0.2311,
"objective": 0.22347129881381989,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5625,
"regularize": 0.22347129881381989,
"step": 565
},
{
"dpo_loss": 0.5202825665473938,
"epoch": 3.230987246102976,
"grad_norm": 96.26968527459513,
"learning_rate": 3.327227093609824e-07,
"logits": -1.1865532398223877,
"logps": -80.67752075195312,
"loss": 0.2252,
"objective": 0.21609430015087128,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5,
"regularize": 0.21609430015087128,
"step": 570
},
{
"dpo_loss": 0.5354868769645691,
"epoch": 3.259329239489844,
"grad_norm": 107.37637317824311,
"learning_rate": 3.234110104457536e-07,
"logits": -1.2506965398788452,
"logps": -81.32064056396484,
"loss": 0.2293,
"objective": 0.2312368005514145,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5375000238418579,
"regularize": 0.2312368005514145,
"step": 575
},
{
"dpo_loss": 0.5328246355056763,
"epoch": 3.287671232876712,
"grad_norm": 100.4320056036311,
"learning_rate": 3.141687721698363e-07,
"logits": -1.2623833417892456,
"logps": -82.67001342773438,
"loss": 0.2288,
"objective": 0.22114989161491394,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.574999988079071,
"regularize": 0.22114989161491394,
"step": 580
},
{
"dpo_loss": 0.5769094228744507,
"epoch": 3.3160132262635806,
"grad_norm": 95.72812686493451,
"learning_rate": 3.049996299341742e-07,
"logits": -1.279525876045227,
"logps": -83.05570220947266,
"loss": 0.2225,
"objective": 0.2485423982143402,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5458333492279053,
"regularize": 0.2485423982143402,
"step": 585
},
{
"dpo_loss": 0.5602288842201233,
"epoch": 3.3443552196504487,
"grad_norm": 114.56713956727607,
"learning_rate": 2.959071903876486e-07,
"logits": -1.294264554977417,
"logps": -83.3205795288086,
"loss": 0.2291,
"objective": 0.22891105711460114,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5166666507720947,
"regularize": 0.22891105711460114,
"step": 590
},
{
"dpo_loss": 0.5364168286323547,
"epoch": 3.372697213037317,
"grad_norm": 110.16666826508619,
"learning_rate": 2.86895030008416e-07,
"logits": -1.2113687992095947,
"logps": -82.52010345458984,
"loss": 0.2129,
"objective": 0.22527165710926056,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.22527165710926056,
"step": 595
},
{
"dpo_loss": 0.5123292803764343,
"epoch": 3.4010392064241852,
"grad_norm": 102.02441515454663,
"learning_rate": 2.779666936971129e-07,
"logits": -1.2210050821304321,
"logps": -81.7782974243164,
"loss": 0.2121,
"objective": 0.22540892660617828,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.22540892660617828,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.7512553930282593,
"eval_logits": -1.2610689401626587,
"eval_logps": -87.82508087158203,
"eval_loss": 0.7149726152420044,
"eval_objective": 0.7195205688476562,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 0.7195205688476562,
"eval_runtime": 260.141,
"eval_samples_per_second": 22.257,
"eval_steps_per_second": 0.93,
"step": 600
},
{
"dpo_loss": 0.5458318591117859,
"epoch": 3.4293811998110533,
"grad_norm": 109.5352185195456,
"learning_rate": 2.6912569338248315e-07,
"logits": -1.2689701318740845,
"logps": -83.29906463623047,
"loss": 0.2091,
"objective": 0.18265347182750702,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.550000011920929,
"regularize": 0.18265347182750702,
"step": 605
},
{
"dpo_loss": 0.5613773465156555,
"epoch": 3.4577231931979218,
"grad_norm": 98.96988548944374,
"learning_rate": 2.603755066399718e-07,
"logits": -1.240617036819458,
"logps": -81.78998565673828,
"loss": 0.208,
"objective": 0.20637358725070953,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5291666388511658,
"regularize": 0.20637358725070953,
"step": 610
},
{
"dpo_loss": 0.5179942846298218,
"epoch": 3.48606518658479,
"grad_norm": 101.26514646147335,
"learning_rate": 2.517195753238345e-07,
"logits": -1.2508882284164429,
"logps": -81.7972183227539,
"loss": 0.1973,
"objective": 0.19295792281627655,
"ranking_idealized": 0.4749999940395355,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.47083333134651184,
"regularize": 0.19295792281627655,
"step": 615
},
{
"dpo_loss": 0.49570992588996887,
"epoch": 3.514407179971658,
"grad_norm": 94.4850227020022,
"learning_rate": 2.4316130421329696e-07,
"logits": -1.1392742395401,
"logps": -81.18209838867188,
"loss": 0.1959,
"objective": 0.19464272260665894,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.512499988079071,
"regularize": 0.19464272260665894,
"step": 620
},
{
"dpo_loss": 0.5331357717514038,
"epoch": 3.5427491733585263,
"grad_norm": 102.01306480785293,
"learning_rate": 2.3470405967329604e-07,
"logits": -1.2014575004577637,
"logps": -80.8895492553711,
"loss": 0.189,
"objective": 0.20246191322803497,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5291666388511658,
"regularize": 0.20246191322803497,
"step": 625
},
{
"dpo_loss": 0.5293785929679871,
"epoch": 3.5710911667453944,
"grad_norm": 104.25371252839783,
"learning_rate": 2.2635116833033392e-07,
"logits": -1.28461754322052,
"logps": -80.65286254882812,
"loss": 0.1914,
"objective": 0.1838250458240509,
"ranking_idealized": 0.5291666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5333333611488342,
"regularize": 0.1838250458240509,
"step": 630
},
{
"dpo_loss": 0.5447375178337097,
"epoch": 3.5994331601322624,
"grad_norm": 98.62302663391017,
"learning_rate": 2.181059157639598e-07,
"logits": -1.2204653024673462,
"logps": -81.04381561279297,
"loss": 0.1798,
"objective": 0.1847100853919983,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.49166667461395264,
"regularize": 0.1847100704908371,
"step": 635
},
{
"dpo_loss": 0.5025536417961121,
"epoch": 3.627775153519131,
"grad_norm": 98.21161327414355,
"learning_rate": 2.0997154521440097e-07,
"logits": -1.178464412689209,
"logps": -81.83779907226562,
"loss": 0.1761,
"objective": 0.18075956404209137,
"ranking_idealized": 0.4958333373069763,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5,
"regularize": 0.18075956404209137,
"step": 640
},
{
"dpo_loss": 0.5238084197044373,
"epoch": 3.656117146905999,
"grad_norm": 96.02562335204865,
"learning_rate": 2.0195125630684428e-07,
"logits": -1.2101988792419434,
"logps": -80.41759490966797,
"loss": 0.1754,
"objective": 0.17556659877300262,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.17556659877300262,
"step": 645
},
{
"dpo_loss": 0.5436745882034302,
"epoch": 3.6844591402928675,
"grad_norm": 99.15450590850473,
"learning_rate": 1.9404820379287672e-07,
"logits": -1.2024494409561157,
"logps": -81.63809967041016,
"loss": 0.1721,
"objective": 0.15629148483276367,
"ranking_idealized": 0.5166666507720947,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.15629148483276367,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.7529332041740417,
"eval_logits": -1.2667409181594849,
"eval_logps": -87.55421447753906,
"eval_loss": 0.7180734276771545,
"eval_objective": 0.7210157513618469,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 0.7210157513618469,
"eval_runtime": 258.8884,
"eval_samples_per_second": 22.365,
"eval_steps_per_second": 0.935,
"step": 650
},
{
"dpo_loss": 0.5238640904426575,
"epoch": 3.7128011336797355,
"grad_norm": 97.25223896173355,
"learning_rate": 1.8626549630957395e-07,
"logits": -1.240193486213684,
"logps": -80.53789520263672,
"loss": 0.1708,
"objective": 0.1773752123117447,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5416666865348816,
"regularize": 0.1773752123117447,
"step": 655
},
{
"dpo_loss": 0.5414367914199829,
"epoch": 3.7411431270666036,
"grad_norm": 98.36333988268215,
"learning_rate": 1.7860619515673032e-07,
"logits": -1.318145513534546,
"logps": -81.55570983886719,
"loss": 0.1704,
"objective": 0.1702161431312561,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5625,
"regularize": 0.1702161431312561,
"step": 660
},
{
"dpo_loss": 0.5241764187812805,
"epoch": 3.769485120453472,
"grad_norm": 104.11429458509453,
"learning_rate": 1.7107331309270684e-07,
"logits": -1.2156672477722168,
"logps": -81.38536071777344,
"loss": 0.1559,
"objective": 0.16640320420265198,
"ranking_idealized": 0.5208333134651184,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5208333134651184,
"regularize": 0.16640320420265198,
"step": 665
},
{
"dpo_loss": 0.5278924703598022,
"epoch": 3.79782711384034,
"grad_norm": 101.56797379943947,
"learning_rate": 1.6366981314937372e-07,
"logits": -1.321338415145874,
"logps": -81.34001922607422,
"loss": 0.1551,
"objective": 0.14367233216762543,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6041666865348816,
"regularize": 0.14367233216762543,
"step": 670
},
{
"dpo_loss": 0.529498279094696,
"epoch": 3.826169107227208,
"grad_norm": 93.64293062728967,
"learning_rate": 1.5639860746661338e-07,
"logits": -1.2852925062179565,
"logps": -80.37777709960938,
"loss": 0.1531,
"objective": 0.1431640088558197,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.574999988079071,
"regularize": 0.1431640088558197,
"step": 675
},
{
"dpo_loss": 0.5504888892173767,
"epoch": 3.8545111006140766,
"grad_norm": 101.91053202750965,
"learning_rate": 1.492625561468393e-07,
"logits": -1.2134761810302734,
"logps": -80.77507781982422,
"loss": 0.1495,
"objective": 0.15138006210327148,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 0.15138006210327148,
"step": 680
},
{
"dpo_loss": 0.5266403555870056,
"epoch": 3.8828530940009447,
"grad_norm": 105.20220630755058,
"learning_rate": 1.4226446612998671e-07,
"logits": -1.2806271314620972,
"logps": -80.39690399169922,
"loss": 0.1478,
"objective": 0.1402387022972107,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5416666865348816,
"regularize": 0.1402387022972107,
"step": 685
},
{
"dpo_loss": 0.5246676802635193,
"epoch": 3.9111950873878127,
"grad_norm": 93.05384336107711,
"learning_rate": 1.3540709008941147e-07,
"logits": -1.2790921926498413,
"logps": -80.16386413574219,
"loss": 0.1448,
"objective": 0.1356712281703949,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.574999988079071,
"regularize": 0.1356712281703949,
"step": 690
},
{
"dpo_loss": 0.5320358276367188,
"epoch": 3.9395370807746812,
"grad_norm": 92.75785969700081,
"learning_rate": 1.2869312534913685e-07,
"logits": -1.1912401914596558,
"logps": -81.54454803466797,
"loss": 0.1365,
"objective": 0.15173907577991486,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.4791666567325592,
"regularize": 0.15173907577991486,
"step": 695
},
{
"dpo_loss": 0.5335038304328918,
"epoch": 3.9678790741615493,
"grad_norm": 98.04635697056264,
"learning_rate": 1.2212521282287093e-07,
"logits": -1.2281270027160645,
"logps": -81.2812728881836,
"loss": 0.1386,
"objective": 0.15212775766849518,
"ranking_idealized": 0.49166667461395264,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.49166667461395264,
"regularize": 0.15212775766849518,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.7514122128486633,
"eval_logits": -1.26543128490448,
"eval_logps": -87.5438003540039,
"eval_loss": 0.7064864635467529,
"eval_objective": 0.7093826532363892,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 0.7093826532363892,
"eval_runtime": 259.4271,
"eval_samples_per_second": 22.318,
"eval_steps_per_second": 0.933,
"step": 700
},
{
"dpo_loss": 0.5151931047439575,
"epoch": 3.9962210675484178,
"grad_norm": 98.98779841672567,
"learning_rate": 1.15705935975212e-07,
"logits": -1.1545416116714478,
"logps": -79.99464416503906,
"loss": 0.133,
"objective": 0.12396544963121414,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5541666746139526,
"regularize": 0.12396544963121414,
"step": 705
},
{
"dpo_loss": 0.5161293745040894,
"epoch": 4.024563060935286,
"grad_norm": 94.60148232757011,
"learning_rate": 1.094378198054533e-07,
"logits": -1.2483078241348267,
"logps": -81.60172271728516,
"loss": 0.1163,
"objective": 0.09800054132938385,
"ranking_idealized": 0.48750001192092896,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.49166667461395264,
"regularize": 0.09800054132938385,
"step": 710
},
{
"dpo_loss": 0.5236179232597351,
"epoch": 4.052905054322154,
"grad_norm": 95.22257327163594,
"learning_rate": 1.0332332985438247e-07,
"logits": -1.2045139074325562,
"logps": -81.0995864868164,
"loss": 0.1109,
"objective": 0.10611793398857117,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5458333492279053,
"regularize": 0.10611793398857117,
"step": 715
},
{
"dpo_loss": 0.5168012976646423,
"epoch": 4.081247047709022,
"grad_norm": 101.20095806117061,
"learning_rate": 9.736487123447068e-08,
"logits": -1.1531497240066528,
"logps": -83.11766052246094,
"loss": 0.1094,
"objective": 0.11501701176166534,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5291666388511658,
"regularize": 0.11501700431108475,
"step": 720
},
{
"dpo_loss": 0.5381873250007629,
"epoch": 4.109589041095891,
"grad_norm": 95.89781184589388,
"learning_rate": 9.156478768383058e-08,
"logits": -1.226915955543518,
"logps": -81.01741790771484,
"loss": 0.1131,
"objective": 0.10321195423603058,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 0.10321194678544998,
"step": 725
},
{
"dpo_loss": 0.5230408310890198,
"epoch": 4.137931034482759,
"grad_norm": 100.59516155263596,
"learning_rate": 8.592536064431466e-08,
"logits": -1.2960838079452515,
"logps": -81.26818084716797,
"loss": 0.1063,
"objective": 0.09859620779752731,
"ranking_idealized": 0.550000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5541666746139526,
"regularize": 0.09859620034694672,
"step": 730
},
{
"dpo_loss": 0.5300396084785461,
"epoch": 4.166273027869627,
"grad_norm": 99.36395143789497,
"learning_rate": 8.044880836411888e-08,
"logits": -1.1851133108139038,
"logps": -82.21375274658203,
"loss": 0.1031,
"objective": 0.10836609452962875,
"ranking_idealized": 0.5541666746139526,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.10836609452962875,
"step": 735
},
{
"dpo_loss": 0.5081815719604492,
"epoch": 4.194615021256495,
"grad_norm": 97.85835254690582,
"learning_rate": 7.513728502524286e-08,
"logits": -1.214992880821228,
"logps": -81.85246276855469,
"loss": 0.1108,
"objective": 0.09990967810153961,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5375000238418579,
"regularize": 0.09990967065095901,
"step": 740
},
{
"dpo_loss": 0.5218914747238159,
"epoch": 4.222957014643363,
"grad_norm": 102.73912212411751,
"learning_rate": 6.999287989614971e-08,
"logits": -1.2808277606964111,
"logps": -79.33137512207031,
"loss": 0.1003,
"objective": 0.0994986817240715,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5041666626930237,
"regularize": 0.0994986817240715,
"step": 745
},
{
"dpo_loss": 0.5142425298690796,
"epoch": 4.251299008030231,
"grad_norm": 98.36808351404612,
"learning_rate": 6.501761650996052e-08,
"logits": -1.3703263998031616,
"logps": -81.33523559570312,
"loss": 0.0985,
"objective": 0.09714233130216599,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5416666865348816,
"regularize": 0.09714233130216599,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.7508513331413269,
"eval_logits": -1.2698993682861328,
"eval_logps": -87.64305877685547,
"eval_loss": 0.7095766663551331,
"eval_objective": 0.7117514610290527,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 0.7117514610290527,
"eval_runtime": 259.3031,
"eval_samples_per_second": 22.329,
"eval_steps_per_second": 0.933,
"step": 750
},
{
"dpo_loss": 0.5111901760101318,
"epoch": 4.2796410014171,
"grad_norm": 95.47883788378027,
"learning_rate": 6.021345186850418e-08,
"logits": -1.2960675954818726,
"logps": -81.29663848876953,
"loss": 0.0959,
"objective": 0.0885348990559578,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5458333492279053,
"regularize": 0.0885348990559578,
"step": 755
},
{
"dpo_loss": 0.5198584794998169,
"epoch": 4.307982994803968,
"grad_norm": 107.35986940095462,
"learning_rate": 5.5582275672538316e-08,
"logits": -1.2736433744430542,
"logps": -81.2705078125,
"loss": 0.0936,
"objective": 0.08593542128801346,
"ranking_idealized": 0.5791666507720947,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5791666507720947,
"regularize": 0.08593542128801346,
"step": 760
},
{
"dpo_loss": 0.508173406124115,
"epoch": 4.336324988190836,
"grad_norm": 99.70173708140004,
"learning_rate": 5.112590957844232e-08,
"logits": -1.2959843873977661,
"logps": -82.02350616455078,
"loss": 0.0954,
"objective": 0.09023122489452362,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5458333492279053,
"regularize": 0.09023122489452362,
"step": 765
},
{
"dpo_loss": 0.5115682482719421,
"epoch": 4.364666981577704,
"grad_norm": 94.66084425031231,
"learning_rate": 4.684610648167503e-08,
"logits": -1.1798807382583618,
"logps": -80.28400421142578,
"loss": 0.1006,
"objective": 0.10149048268795013,
"ranking_idealized": 0.5416666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5416666865348816,
"regularize": 0.10149047523736954,
"step": 770
},
{
"dpo_loss": 0.5190815329551697,
"epoch": 4.393008974964572,
"grad_norm": 97.786866794993,
"learning_rate": 4.274454982728032e-08,
"logits": -1.2468054294586182,
"logps": -81.20288848876953,
"loss": 0.0877,
"objective": 0.08333703130483627,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5249999761581421,
"regularize": 0.08333703130483627,
"step": 775
},
{
"dpo_loss": 0.5364963412284851,
"epoch": 4.42135096835144,
"grad_norm": 93.67850493598061,
"learning_rate": 3.882285294770937e-08,
"logits": -1.2755876779556274,
"logps": -80.01787567138672,
"loss": 0.0938,
"objective": 0.09289808571338654,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5,
"regularize": 0.09289808571338654,
"step": 780
},
{
"dpo_loss": 0.5412746667861938,
"epoch": 4.449692961738309,
"grad_norm": 94.9731716353141,
"learning_rate": 3.508255842822255e-08,
"logits": -1.292734980583191,
"logps": -80.0306625366211,
"loss": 0.0913,
"objective": 0.09275906533002853,
"ranking_idealized": 0.5458333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5416666865348816,
"regularize": 0.09275905787944794,
"step": 785
},
{
"dpo_loss": 0.5126563906669617,
"epoch": 4.478034955125177,
"grad_norm": 96.74045611069916,
"learning_rate": 3.15251375001192e-08,
"logits": -1.253103494644165,
"logps": -82.71080017089844,
"loss": 0.0897,
"objective": 0.08310778439044952,
"ranking_idealized": 0.5833333134651184,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.5791666507720947,
"regularize": 0.08310777693986893,
"step": 790
},
{
"dpo_loss": 0.5277770161628723,
"epoch": 4.506376948512045,
"grad_norm": 102.60968536429716,
"learning_rate": 2.8151989462033787e-08,
"logits": -1.1557574272155762,
"logps": -82.18769073486328,
"loss": 0.0898,
"objective": 0.08717220276594162,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5583333373069763,
"regularize": 0.08717220276594162,
"step": 795
},
{
"dpo_loss": 0.5270230770111084,
"epoch": 4.534718941898913,
"grad_norm": 96.30549999556874,
"learning_rate": 2.4964441129527335e-08,
"logits": -1.1533504724502563,
"logps": -81.5091323852539,
"loss": 0.0882,
"objective": 0.09708014130592346,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5333333611488342,
"regularize": 0.09708014130592346,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.7520135641098022,
"eval_logits": -1.2693291902542114,
"eval_logps": -87.7427749633789,
"eval_loss": 0.7119117975234985,
"eval_objective": 0.7144764065742493,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5113636255264282,
"eval_regularize": 0.7144764065742493,
"eval_runtime": 258.9953,
"eval_samples_per_second": 22.356,
"eval_steps_per_second": 0.934,
"step": 800
},
{
"dpo_loss": 0.5197975039482117,
"epoch": 4.563060935285781,
"grad_norm": 94.79722521361904,
"learning_rate": 2.1963746313188757e-08,
"logits": -1.226369023323059,
"logps": -80.86296844482422,
"loss": 0.0813,
"objective": 0.07861018925905228,
"ranking_idealized": 0.4833333194255829,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.48750001192092896,
"regularize": 0.07861018925905228,
"step": 805
},
{
"dpo_loss": 0.5268690586090088,
"epoch": 4.59140292867265,
"grad_norm": 95.24668751727883,
"learning_rate": 1.915108532545351e-08,
"logits": -1.290175199508667,
"logps": -80.24264526367188,
"loss": 0.0875,
"objective": 0.08437123894691467,
"ranking_idealized": 0.46666666865348816,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.46666666865348816,
"regularize": 0.08437123149633408,
"step": 810
},
{
"dpo_loss": 0.5314128398895264,
"epoch": 4.619744922059518,
"grad_norm": 94.93833650674884,
"learning_rate": 1.6527564516331638e-08,
"logits": -1.1623790264129639,
"logps": -81.81945037841797,
"loss": 0.0826,
"objective": 0.09123408049345016,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.6166666746139526,
"regularize": 0.09123408049345016,
"step": 815
},
{
"dpo_loss": 0.5448810458183289,
"epoch": 4.648086915446386,
"grad_norm": 99.02648077337858,
"learning_rate": 1.4094215838229172e-08,
"logits": -1.2590090036392212,
"logps": -81.42196655273438,
"loss": 0.0864,
"objective": 0.09086828678846359,
"ranking_idealized": 0.5625,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.5583333373069763,
"regularize": 0.09086828678846359,
"step": 820
},
{
"dpo_loss": 0.5343818664550781,
"epoch": 4.6764289088332545,
"grad_norm": 93.21086746148787,
"learning_rate": 1.1851996440033318e-08,
"logits": -1.1644405126571655,
"logps": -79.48759460449219,
"loss": 0.0796,
"objective": 0.07582148164510727,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5083333253860474,
"regularize": 0.07582148164510727,
"step": 825
},
{
"dpo_loss": 0.5317553281784058,
"epoch": 4.7047709022201225,
"grad_norm": 97.38269826526711,
"learning_rate": 9.801788290621505e-09,
"logits": -1.3180792331695557,
"logps": -81.53900909423828,
"loss": 0.0901,
"objective": 0.10412049293518066,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5375000238418579,
"regularize": 0.10412048548460007,
"step": 830
},
{
"dpo_loss": 0.5185703635215759,
"epoch": 4.733112895606991,
"grad_norm": 92.96268594737676,
"learning_rate": 7.944397831941951e-09,
"logits": -1.2221894264221191,
"logps": -81.21542358398438,
"loss": 0.0893,
"objective": 0.0936354324221611,
"ranking_idealized": 0.512499988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5083333253860474,
"regularize": 0.09363541752099991,
"step": 835
},
{
"dpo_loss": 0.5039588212966919,
"epoch": 4.7614548889938595,
"grad_norm": 93.6593830751853,
"learning_rate": 6.280555661802856e-09,
"logits": -1.2342692613601685,
"logps": -81.57914733886719,
"loss": 0.0827,
"objective": 0.09424074739217758,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.5583333373069763,
"regularize": 0.09424074739217758,
"step": 840
},
{
"dpo_loss": 0.5341091752052307,
"epoch": 4.7897968823807275,
"grad_norm": 94.7722019315539,
"learning_rate": 4.810916246494157e-09,
"logits": -1.2643479108810425,
"logps": -81.62488555908203,
"loss": 0.0815,
"objective": 0.07745879143476486,
"ranking_idealized": 0.4791666567325592,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.47083333134651184,
"regularize": 0.07745879143476486,
"step": 845
},
{
"dpo_loss": 0.5221010446548462,
"epoch": 4.818138875767596,
"grad_norm": 94.27228247906854,
"learning_rate": 3.5360576633558513e-09,
"logits": -1.2364792823791504,
"logps": -80.28359985351562,
"loss": 0.0796,
"objective": 0.0774383395910263,
"ranking_idealized": 0.5333333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5291666388511658,
"regularize": 0.07743828743696213,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.7514771223068237,
"eval_logits": -1.2688941955566406,
"eval_logps": -87.71552276611328,
"eval_loss": 0.7094753384590149,
"eval_objective": 0.711796224117279,
"eval_ranking_idealized": 0.5247933864593506,
"eval_ranking_idealized_expo": 0.5092975497245789,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 0.711796224117279,
"eval_runtime": 259.5779,
"eval_samples_per_second": 22.305,
"eval_steps_per_second": 0.932,
"step": 850
},
{
"dpo_loss": 0.5401983857154846,
"epoch": 4.846480869154464,
"grad_norm": 92.34602743429262,
"learning_rate": 2.4564813733932155e-09,
"logits": -1.205161690711975,
"logps": -80.62550354003906,
"loss": 0.08,
"objective": 0.08250019699335098,
"ranking_idealized": 0.5583333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5625,
"regularize": 0.08250019699335098,
"step": 855
},
{
"dpo_loss": 0.5175269246101379,
"epoch": 4.874822862541333,
"grad_norm": 99.70852124493375,
"learning_rate": 1.5726120240288631e-09,
"logits": -1.1921393871307373,
"logps": -80.51878356933594,
"loss": 0.0814,
"objective": 0.07799576222896576,
"ranking_idealized": 0.5666666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5666666626930237,
"regularize": 0.07799576222896576,
"step": 860
},
{
"dpo_loss": 0.5134026408195496,
"epoch": 4.903164855928201,
"grad_norm": 93.71216642807723,
"learning_rate": 8.847972820693051e-10,
"logits": -1.2603986263275146,
"logps": -79.67554473876953,
"loss": 0.0745,
"objective": 0.07266159355640411,
"ranking_idealized": 0.5083333253860474,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5166666507720947,
"regularize": 0.07266159355640411,
"step": 865
},
{
"dpo_loss": 0.5160818099975586,
"epoch": 4.931506849315069,
"grad_norm": 95.34876322242407,
"learning_rate": 3.933076969516724e-10,
"logits": -1.29552161693573,
"logps": -81.27151489257812,
"loss": 0.0794,
"objective": 0.08014672249555588,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5333333611488342,
"regularize": 0.08014672249555588,
"step": 870
},
{
"dpo_loss": 0.522140622138977,
"epoch": 4.959848842701937,
"grad_norm": 95.71079860788076,
"learning_rate": 9.833659432367803e-11,
"logits": -1.2183899879455566,
"logps": -81.16183471679688,
"loss": 0.081,
"objective": 0.08778008073568344,
"ranking_idealized": 0.574999988079071,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.574999988079071,
"regularize": 0.08778008073568344,
"step": 875
},
{
"dpo_loss": 0.5225833058357239,
"epoch": 4.988190836088805,
"grad_norm": 104.51115430406888,
"learning_rate": 0.0,
"logits": -1.2887017726898193,
"logps": -81.99433898925781,
"loss": 0.0809,
"objective": 0.0927945151925087,
"ranking_idealized": 0.5375000238418579,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.5375000238418579,
"regularize": 0.0927945151925087,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.3223599465055899,
"train_runtime": 35128.3246,
"train_samples_per_second": 7.231,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}