hZzy's picture
Model save
d2e1e35 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 1.3413641460666135,
"learning_rate": 5.681818181818182e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.9791666865348816,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6931489109992981,
"epoch": 0.02834199338686821,
"grad_norm": 1.344017584578235,
"learning_rate": 2.840909090909091e-07,
"logits": -1.3680044412612915,
"logps": -84.2520523071289,
"loss": 0.4131,
"objective": 0.3755461275577545,
"ranking_idealized": 0.921875,
"ranking_idealized_expo": 0.5729166865348816,
"ranking_simple": 0.546875,
"regularize": 0.3755461275577545,
"step": 5
},
{
"dpo_loss": 0.6928147077560425,
"epoch": 0.05668398677373642,
"grad_norm": 1.297307695578835,
"learning_rate": 5.681818181818182e-07,
"logits": -1.447161078453064,
"logps": -82.31820678710938,
"loss": 0.4176,
"objective": 0.4424538016319275,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5166666507720947,
"regularize": 0.4424538016319275,
"step": 10
},
{
"dpo_loss": 0.6928682923316956,
"epoch": 0.08502598016060463,
"grad_norm": 1.2486707608263468,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4277892112731934,
"logps": -81.3590316772461,
"loss": 0.4254,
"objective": 0.41196563839912415,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.512499988079071,
"regularize": 0.41196563839912415,
"step": 15
},
{
"dpo_loss": 0.6925787329673767,
"epoch": 0.11336797354747284,
"grad_norm": 1.4692119961571695,
"learning_rate": 1.1363636363636364e-06,
"logits": -1.4481867551803589,
"logps": -81.8401870727539,
"loss": 0.4151,
"objective": 0.4033361077308655,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.5291666388511658,
"regularize": 0.4033361077308655,
"step": 20
},
{
"dpo_loss": 0.6910920739173889,
"epoch": 0.14170996693434104,
"grad_norm": 1.6122424998527856,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.5582950115203857,
"logps": -82.6436538696289,
"loss": 0.4117,
"objective": 0.43133974075317383,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 0.43133974075317383,
"step": 25
},
{
"dpo_loss": 0.6899585723876953,
"epoch": 0.17005196032120926,
"grad_norm": 2.145868200166189,
"learning_rate": 1.7045454545454546e-06,
"logits": -1.6009422540664673,
"logps": -86.27643585205078,
"loss": 0.4119,
"objective": 0.4145013391971588,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.5666666626930237,
"regularize": 0.4145013391971588,
"step": 30
},
{
"dpo_loss": 0.6881429553031921,
"epoch": 0.19839395370807747,
"grad_norm": 3.250036749772235,
"learning_rate": 1.9886363636363638e-06,
"logits": -1.6226321458816528,
"logps": -95.08840942382812,
"loss": 0.408,
"objective": 0.3913627564907074,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.550000011920929,
"regularize": 0.3913627564907074,
"step": 35
},
{
"dpo_loss": 0.681670606136322,
"epoch": 0.22673594709494568,
"grad_norm": 2.7778046190059134,
"learning_rate": 2.2727272727272728e-06,
"logits": -1.6656767129898071,
"logps": -101.75907135009766,
"loss": 0.4132,
"objective": 0.42999422550201416,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6041666865348816,
"regularize": 0.42999422550201416,
"step": 40
},
{
"dpo_loss": 0.6795368790626526,
"epoch": 0.25507794048181387,
"grad_norm": 3.073343526840778,
"learning_rate": 2.556818181818182e-06,
"logits": -1.7650772333145142,
"logps": -107.9706039428711,
"loss": 0.4172,
"objective": 0.46339866518974304,
"ranking_idealized": 0.9666666388511658,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6083333492279053,
"regularize": 0.46339866518974304,
"step": 45
},
{
"dpo_loss": 0.6784433126449585,
"epoch": 0.2834199338686821,
"grad_norm": 4.230883583179675,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.6497570276260376,
"logps": -111.7117919921875,
"loss": 0.4052,
"objective": 0.3878687024116516,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.44999998807907104,
"ranking_simple": 0.5666666626930237,
"regularize": 0.3878687024116516,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.691393256187439,
"eval_logits": -1.8292194604873657,
"eval_logps": -129.08827209472656,
"eval_loss": 0.4106997549533844,
"eval_objective": 0.41201457381248474,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5371900796890259,
"eval_regularize": 0.41201457381248474,
"eval_runtime": 265.4611,
"eval_samples_per_second": 21.811,
"eval_steps_per_second": 0.912,
"step": 50
},
{
"dpo_loss": 0.6729306578636169,
"epoch": 0.3117619272555503,
"grad_norm": 5.170133309231958,
"learning_rate": 3.125e-06,
"logits": -1.7047711610794067,
"logps": -128.38836669921875,
"loss": 0.3945,
"objective": 0.39437106251716614,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6208333373069763,
"regularize": 0.39437106251716614,
"step": 55
},
{
"dpo_loss": 0.673882246017456,
"epoch": 0.3401039206424185,
"grad_norm": 4.832516943098698,
"learning_rate": 3.409090909090909e-06,
"logits": -1.6675713062286377,
"logps": -128.96734619140625,
"loss": 0.3855,
"objective": 0.3761754035949707,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6041666865348816,
"regularize": 0.3761754035949707,
"step": 60
},
{
"dpo_loss": 0.669740617275238,
"epoch": 0.3684459140292867,
"grad_norm": 5.367256043496177,
"learning_rate": 3.6931818181818186e-06,
"logits": -1.5951703786849976,
"logps": -137.0664520263672,
"loss": 0.3701,
"objective": 0.36182090640068054,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6000000238418579,
"regularize": 0.36182090640068054,
"step": 65
},
{
"dpo_loss": 0.6654062271118164,
"epoch": 0.39678790741615494,
"grad_norm": 4.94424534111878,
"learning_rate": 3.9772727272727275e-06,
"logits": -1.697352647781372,
"logps": -133.348388671875,
"loss": 0.3724,
"objective": 0.40955594182014465,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.637499988079071,
"regularize": 0.40955594182014465,
"step": 70
},
{
"dpo_loss": 0.6624744534492493,
"epoch": 0.42512990080302315,
"grad_norm": 5.966524918481801,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.8367187976837158,
"logps": -136.5087432861328,
"loss": 0.3757,
"objective": 0.38774457573890686,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.625,
"regularize": 0.38774457573890686,
"step": 75
},
{
"dpo_loss": 0.6609200835227966,
"epoch": 0.45347189418989137,
"grad_norm": 6.280774873594145,
"learning_rate": 4.5454545454545455e-06,
"logits": -1.9799270629882812,
"logps": -158.3598175048828,
"loss": 0.356,
"objective": 0.364311546087265,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6625000238418579,
"regularize": 0.364311546087265,
"step": 80
},
{
"dpo_loss": 0.6481165289878845,
"epoch": 0.4818138875767596,
"grad_norm": 6.596386501175196,
"learning_rate": 4.829545454545455e-06,
"logits": -2.1429412364959717,
"logps": -150.511474609375,
"loss": 0.3409,
"objective": 0.34856364130973816,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6625000238418579,
"regularize": 0.34856364130973816,
"step": 85
},
{
"dpo_loss": 0.6467424035072327,
"epoch": 0.5101558809636277,
"grad_norm": 7.381865971758164,
"learning_rate": 4.999921328558333e-06,
"logits": -2.0832204818725586,
"logps": -174.22291564941406,
"loss": 0.3472,
"objective": 0.3462918698787689,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6333333253860474,
"regularize": 0.3462918698787689,
"step": 90
},
{
"dpo_loss": 0.6253587007522583,
"epoch": 0.538497874350496,
"grad_norm": 8.119206476955762,
"learning_rate": 4.999036331701828e-06,
"logits": -2.2299115657806396,
"logps": -187.98475646972656,
"loss": 0.3267,
"objective": 0.33174222707748413,
"ranking_idealized": 0.9624999761581421,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.7291666865348816,
"regularize": 0.33174222707748413,
"step": 95
},
{
"dpo_loss": 0.6211538314819336,
"epoch": 0.5668398677373642,
"grad_norm": 6.956710000085001,
"learning_rate": 4.997168347957521e-06,
"logits": -2.3657114505767822,
"logps": -187.91744995117188,
"loss": 0.3407,
"objective": 0.3683268129825592,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.7083333134651184,
"regularize": 0.3683268129825592,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6838738322257996,
"eval_logits": -2.5065720081329346,
"eval_logps": -173.33192443847656,
"eval_loss": 0.40174734592437744,
"eval_objective": 0.40628084540367126,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5547520518302917,
"eval_regularize": 0.40628084540367126,
"eval_runtime": 259.1906,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 0.934,
"step": 100
},
{
"dpo_loss": 0.6136354804039001,
"epoch": 0.5951818611242324,
"grad_norm": 9.080339976174114,
"learning_rate": 4.994318112090048e-06,
"logits": -2.1985232830047607,
"logps": -186.09088134765625,
"loss": 0.3217,
"objective": 0.3403078019618988,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.737500011920929,
"regularize": 0.3403078019618988,
"step": 105
},
{
"dpo_loss": 0.6185809969902039,
"epoch": 0.6235238545111006,
"grad_norm": 7.791051933395558,
"learning_rate": 4.990486745229364e-06,
"logits": -2.484309434890747,
"logps": -186.34634399414062,
"loss": 0.3255,
"objective": 0.3505449593067169,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.699999988079071,
"regularize": 0.3505449593067169,
"step": 110
},
{
"dpo_loss": 0.6309658288955688,
"epoch": 0.6518658478979689,
"grad_norm": 6.754375219280332,
"learning_rate": 4.985675754429744e-06,
"logits": -2.47392201423645,
"logps": -166.2880859375,
"loss": 0.3035,
"objective": 0.2969822585582733,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6916666626930237,
"regularize": 0.2969822585582733,
"step": 115
},
{
"dpo_loss": 0.6232146620750427,
"epoch": 0.680207841284837,
"grad_norm": 8.649064764793055,
"learning_rate": 4.9798870320769884e-06,
"logits": -2.4262490272521973,
"logps": -179.76458740234375,
"loss": 0.2997,
"objective": 0.27002623677253723,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6916666626930237,
"regularize": 0.27002623677253723,
"step": 120
},
{
"dpo_loss": 0.6118648648262024,
"epoch": 0.7085498346717053,
"grad_norm": 8.68434746516712,
"learning_rate": 4.973122855144066e-06,
"logits": -2.3900842666625977,
"logps": -196.9748992919922,
"loss": 0.2995,
"objective": 0.2906176447868347,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.737500011920929,
"regularize": 0.2906176447868347,
"step": 125
},
{
"dpo_loss": 0.6124536991119385,
"epoch": 0.7368918280585735,
"grad_norm": 7.486248737537153,
"learning_rate": 4.965385884295467e-06,
"logits": -2.4602267742156982,
"logps": -182.41766357421875,
"loss": 0.2873,
"objective": 0.26530107855796814,
"ranking_idealized": 0.9166666865348816,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.7250000238418579,
"regularize": 0.26530107855796814,
"step": 130
},
{
"dpo_loss": 0.606670081615448,
"epoch": 0.7652338214454416,
"grad_norm": 7.285272647997681,
"learning_rate": 4.956679162840646e-06,
"logits": -2.281942844390869,
"logps": -177.3143768310547,
"loss": 0.269,
"objective": 0.270210325717926,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.7208333611488342,
"regularize": 0.270210325717926,
"step": 135
},
{
"dpo_loss": 0.6003846526145935,
"epoch": 0.7935758148323099,
"grad_norm": 7.413948788955954,
"learning_rate": 4.947006115536947e-06,
"logits": -2.1732773780822754,
"logps": -176.40997314453125,
"loss": 0.2664,
"objective": 0.24727170169353485,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.7666666507720947,
"regularize": 0.24727170169353485,
"step": 140
},
{
"dpo_loss": 0.6026275157928467,
"epoch": 0.821917808219178,
"grad_norm": 7.450961549840002,
"learning_rate": 4.9363705472424825e-06,
"logits": -2.2946832180023193,
"logps": -178.30978393554688,
"loss": 0.2707,
"objective": 0.2942873537540436,
"ranking_idealized": 0.9125000238418579,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.7250000238418579,
"regularize": 0.2942873537540436,
"step": 145
},
{
"dpo_loss": 0.6038042306900024,
"epoch": 0.8502598016060463,
"grad_norm": 8.337666421628235,
"learning_rate": 4.924776641419513e-06,
"logits": -2.2924880981445312,
"logps": -175.90670776367188,
"loss": 0.2596,
"objective": 0.2636435329914093,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.7541666626930237,
"regularize": 0.2636435329914093,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6806454658508301,
"eval_logits": -2.4464104175567627,
"eval_logps": -188.63946533203125,
"eval_loss": 0.4017498791217804,
"eval_objective": 0.40516260266304016,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5423553586006165,
"eval_regularize": 0.40516260266304016,
"eval_runtime": 259.5657,
"eval_samples_per_second": 22.306,
"eval_steps_per_second": 0.932,
"step": 150
},
{
"dpo_loss": 0.5875340700149536,
"epoch": 0.8786017949929145,
"grad_norm": 8.964143655715564,
"learning_rate": 4.9122289584888926e-06,
"logits": -2.3187806606292725,
"logps": -185.90478515625,
"loss": 0.2651,
"objective": 0.26818570494651794,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.800000011920929,
"regularize": 0.26818570494651794,
"step": 155
},
{
"dpo_loss": 0.5894069671630859,
"epoch": 0.9069437883797827,
"grad_norm": 7.833211918555924,
"learning_rate": 4.8987324340362445e-06,
"logits": -2.2485156059265137,
"logps": -194.0889129638672,
"loss": 0.2472,
"objective": 0.24632495641708374,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.7541666626930237,
"regularize": 0.24632495641708374,
"step": 160
},
{
"dpo_loss": 0.5998678803443909,
"epoch": 0.9352857817666509,
"grad_norm": 6.957604774053052,
"learning_rate": 4.884292376870567e-06,
"logits": -2.367635488510132,
"logps": -169.78195190429688,
"loss": 0.2564,
"objective": 0.26594653725624084,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.7208333611488342,
"regularize": 0.26594653725624084,
"step": 165
},
{
"dpo_loss": 0.5835825204849243,
"epoch": 0.9636277751535192,
"grad_norm": 8.930600570817928,
"learning_rate": 4.868914466936038e-06,
"logits": -2.363553047180176,
"logps": -192.9253387451172,
"loss": 0.2434,
"objective": 0.22550734877586365,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.7708333134651184,
"regularize": 0.22550733387470245,
"step": 170
},
{
"dpo_loss": 0.5910046100616455,
"epoch": 0.9919697685403873,
"grad_norm": 7.732085709182539,
"learning_rate": 4.8526047530778175e-06,
"logits": -2.2910239696502686,
"logps": -191.64141845703125,
"loss": 0.2496,
"objective": 0.24141448736190796,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.6000000238418579,
"ranking_simple": 0.7875000238418579,
"regularize": 0.24141448736190796,
"step": 175
},
{
"dpo_loss": 0.5720356106758118,
"epoch": 1.0203117619272555,
"grad_norm": 7.878585873914542,
"learning_rate": 4.835369650662767e-06,
"logits": -2.5219788551330566,
"logps": -186.59535217285156,
"loss": 0.2212,
"objective": 0.21063460409641266,
"ranking_idealized": 0.9125000238418579,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.8125,
"regularize": 0.21063460409641266,
"step": 180
},
{
"dpo_loss": 0.5561386942863464,
"epoch": 1.0486537553141237,
"grad_norm": 8.429656140437402,
"learning_rate": 4.817215939055984e-06,
"logits": -2.44401478767395,
"logps": -204.15762329101562,
"loss": 0.2153,
"objective": 0.21741175651550293,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.8458333611488342,
"regularize": 0.21741175651550293,
"step": 185
},
{
"dpo_loss": 0.5703259706497192,
"epoch": 1.076995748700992,
"grad_norm": 6.743069420703677,
"learning_rate": 4.798150758954164e-06,
"logits": -2.489015817642212,
"logps": -198.45516967773438,
"loss": 0.2003,
"objective": 0.17540977895259857,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.8333333134651184,
"regularize": 0.17540977895259857,
"step": 190
},
{
"dpo_loss": 0.5622718930244446,
"epoch": 1.10533774208786,
"grad_norm": 7.079228513521207,
"learning_rate": 4.778181609576832e-06,
"logits": -2.429560661315918,
"logps": -182.86814880371094,
"loss": 0.1994,
"objective": 0.16888141632080078,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.875,
"regularize": 0.16888141632080078,
"step": 195
},
{
"dpo_loss": 0.5696191787719727,
"epoch": 1.1336797354747283,
"grad_norm": 6.830259665006017,
"learning_rate": 4.757316345716554e-06,
"logits": -2.513395071029663,
"logps": -192.72938537597656,
"loss": 0.1965,
"objective": 0.20290271937847137,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.8416666388511658,
"regularize": 0.20290271937847137,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6801125407218933,
"eval_logits": -2.597655773162842,
"eval_logps": -193.1246795654297,
"eval_loss": 0.40018174052238464,
"eval_objective": 0.40411826968193054,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.55888432264328,
"eval_regularize": 0.40411826968193054,
"eval_runtime": 259.3248,
"eval_samples_per_second": 22.327,
"eval_steps_per_second": 0.933,
"step": 200
},
{
"dpo_loss": 0.5719407200813293,
"epoch": 1.1620217288615966,
"grad_norm": 6.896008583963979,
"learning_rate": 4.735563174649278e-06,
"logits": -2.5157065391540527,
"logps": -198.75962829589844,
"loss": 0.205,
"objective": 0.2138771265745163,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8166666626930237,
"regularize": 0.2138771265745163,
"step": 205
},
{
"dpo_loss": 0.5655397176742554,
"epoch": 1.1903637222484649,
"grad_norm": 6.830065558874749,
"learning_rate": 4.7129306529060415e-06,
"logits": -2.547936201095581,
"logps": -187.2952117919922,
"loss": 0.2025,
"objective": 0.22636540234088898,
"ranking_idealized": 0.9041666388511658,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8166666626930237,
"regularize": 0.2263653725385666,
"step": 210
},
{
"dpo_loss": 0.5676775574684143,
"epoch": 1.2187057156353331,
"grad_norm": 6.8520964757916945,
"learning_rate": 4.68942768290728e-06,
"logits": -2.54328989982605,
"logps": -187.34585571289062,
"loss": 0.1863,
"objective": 0.19478672742843628,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.8416666388511658,
"regularize": 0.19478671252727509,
"step": 215
},
{
"dpo_loss": 0.5695532560348511,
"epoch": 1.2470477090222012,
"grad_norm": 6.476230536915824,
"learning_rate": 4.665063509461098e-06,
"logits": -2.455770969390869,
"logps": -187.62728881835938,
"loss": 0.1957,
"objective": 0.1894843727350235,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.6083333492279053,
"ranking_simple": 0.8583333492279053,
"regularize": 0.1894843727350235,
"step": 220
},
{
"dpo_loss": 0.5596610903739929,
"epoch": 1.2753897024090695,
"grad_norm": 6.702837995316673,
"learning_rate": 4.639847716126855e-06,
"logits": -2.4951536655426025,
"logps": -189.64401245117188,
"loss": 0.1959,
"objective": 0.1725076138973236,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.8583333492279053,
"regularize": 0.1725076138973236,
"step": 225
},
{
"dpo_loss": 0.5657731890678406,
"epoch": 1.3037316957959377,
"grad_norm": 7.276570061796103,
"learning_rate": 4.613790221445511e-06,
"logits": -2.5368714332580566,
"logps": -193.73602294921875,
"loss": 0.1875,
"objective": 0.18021216988563538,
"ranking_idealized": 0.9125000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.8041666746139526,
"regularize": 0.18021215498447418,
"step": 230
},
{
"dpo_loss": 0.5439472794532776,
"epoch": 1.3320736891828058,
"grad_norm": 7.643293399256914,
"learning_rate": 4.586901275038201e-06,
"logits": -2.8654701709747314,
"logps": -191.69354248046875,
"loss": 0.1942,
"objective": 0.2024109810590744,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.8416666388511658,
"regularize": 0.2024109810590744,
"step": 235
},
{
"dpo_loss": 0.5640192031860352,
"epoch": 1.360415682569674,
"grad_norm": 7.002186203050705,
"learning_rate": 4.559191453574582e-06,
"logits": -2.7892987728118896,
"logps": -191.39663696289062,
"loss": 0.1853,
"objective": 0.19372233748435974,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.7958333492279053,
"regularize": 0.19372233748435974,
"step": 240
},
{
"dpo_loss": 0.5699235796928406,
"epoch": 1.3887576759565423,
"grad_norm": 7.135454177759647,
"learning_rate": 4.530671656612544e-06,
"logits": -2.747896909713745,
"logps": -188.15423583984375,
"loss": 0.172,
"objective": 0.18116973340511322,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.8291666507720947,
"regularize": 0.18116970360279083,
"step": 245
},
{
"dpo_loss": 0.5546202063560486,
"epoch": 1.4170996693434104,
"grad_norm": 6.856611462056187,
"learning_rate": 4.501353102310901e-06,
"logits": -2.626624822616577,
"logps": -189.72596740722656,
"loss": 0.1784,
"objective": 0.19344764947891235,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.8541666865348816,
"regularize": 0.19344764947891235,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6801539659500122,
"eval_logits": -2.7528111934661865,
"eval_logps": -189.47007751464844,
"eval_loss": 0.39902833104133606,
"eval_objective": 0.40230515599250793,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5619834661483765,
"eval_regularize": 0.40230515599250793,
"eval_runtime": 258.6625,
"eval_samples_per_second": 22.384,
"eval_steps_per_second": 0.936,
"step": 250
},
{
"dpo_loss": 0.5707473754882812,
"epoch": 1.4454416627302786,
"grad_norm": 6.789096244940944,
"learning_rate": 4.4712473230167775e-06,
"logits": -2.524132490158081,
"logps": -189.31150817871094,
"loss": 0.1841,
"objective": 0.17286911606788635,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.8458333611488342,
"regularize": 0.17286911606788635,
"step": 255
},
{
"dpo_loss": 0.5669309496879578,
"epoch": 1.473783656117147,
"grad_norm": 7.262023839822884,
"learning_rate": 4.440366160729393e-06,
"logits": -2.642547130584717,
"logps": -196.33497619628906,
"loss": 0.1778,
"objective": 0.19296441972255707,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.8541666865348816,
"regularize": 0.19296441972255707,
"step": 260
},
{
"dpo_loss": 0.546380877494812,
"epoch": 1.5021256495040152,
"grad_norm": 6.836875807788374,
"learning_rate": 4.4087217624420595e-06,
"logits": -2.617671251296997,
"logps": -200.48138427734375,
"loss": 0.18,
"objective": 0.1762746423482895,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8541666865348816,
"regularize": 0.1762746423482895,
"step": 265
},
{
"dpo_loss": 0.5641717910766602,
"epoch": 1.5304676428908834,
"grad_norm": 6.457668960267231,
"learning_rate": 4.376326575364206e-06,
"logits": -2.5867457389831543,
"logps": -194.27902221679688,
"loss": 0.1782,
"objective": 0.19895337522029877,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.8500000238418579,
"regularize": 0.19895337522029877,
"step": 270
},
{
"dpo_loss": 0.5631863474845886,
"epoch": 1.5588096362777515,
"grad_norm": 6.814477250627082,
"learning_rate": 4.34319334202531e-06,
"logits": -2.5872161388397217,
"logps": -194.5428924560547,
"loss": 0.1719,
"objective": 0.16667112708091736,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.824999988079071,
"regularize": 0.16667112708091736,
"step": 275
},
{
"dpo_loss": 0.5430881977081299,
"epoch": 1.5871516296646198,
"grad_norm": 7.393370645908027,
"learning_rate": 4.309335095262675e-06,
"logits": -2.4844515323638916,
"logps": -203.0095977783203,
"loss": 0.1821,
"objective": 0.18616026639938354,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.862500011920929,
"regularize": 0.18616026639938354,
"step": 280
},
{
"dpo_loss": 0.5480186939239502,
"epoch": 1.615493623051488,
"grad_norm": 6.140317838250891,
"learning_rate": 4.274765153095008e-06,
"logits": -2.618225574493408,
"logps": -192.17019653320312,
"loss": 0.1677,
"objective": 0.16235129535198212,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.8541666865348816,
"regularize": 0.16235129535198212,
"step": 285
},
{
"dpo_loss": 0.5588306188583374,
"epoch": 1.643835616438356,
"grad_norm": 6.415461406420722,
"learning_rate": 4.239497113483819e-06,
"logits": -2.691134214401245,
"logps": -191.76356506347656,
"loss": 0.1678,
"objective": 0.15764465928077698,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.8333333134651184,
"regularize": 0.15764465928077698,
"step": 290
},
{
"dpo_loss": 0.5371195673942566,
"epoch": 1.6721776098252243,
"grad_norm": 6.197778580003095,
"learning_rate": 4.203544848984729e-06,
"logits": -2.665118455886841,
"logps": -199.97247314453125,
"loss": 0.1669,
"objective": 0.15341004729270935,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8958333134651184,
"regularize": 0.15341004729270935,
"step": 295
},
{
"dpo_loss": 0.5480075478553772,
"epoch": 1.7005196032120926,
"grad_norm": 6.138490802083033,
"learning_rate": 4.16692250129073e-06,
"logits": -2.7728724479675293,
"logps": -199.02529907226562,
"loss": 0.1717,
"objective": 0.20036275684833527,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.875,
"regularize": 0.20036275684833527,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6798518300056458,
"eval_logits": -2.877673625946045,
"eval_logps": -195.73040771484375,
"eval_loss": 0.40205851197242737,
"eval_objective": 0.40424150228500366,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5454545617103577,
"eval_regularize": 0.40424150228500366,
"eval_runtime": 259.7378,
"eval_samples_per_second": 22.292,
"eval_steps_per_second": 0.932,
"step": 300
},
{
"dpo_loss": 0.5607944130897522,
"epoch": 1.7288615965989607,
"grad_norm": 6.327660769011926,
"learning_rate": 4.129644475669617e-06,
"logits": -2.741549253463745,
"logps": -191.5762481689453,
"loss": 0.1638,
"objective": 0.17037154734134674,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.8333333134651184,
"regularize": 0.17037154734134674,
"step": 305
},
{
"dpo_loss": 0.5504526495933533,
"epoch": 1.7572035899858292,
"grad_norm": 6.7403750373805575,
"learning_rate": 4.091725435297721e-06,
"logits": -2.7614734172821045,
"logps": -190.3129425048828,
"loss": 0.1573,
"objective": 0.1668892502784729,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.8500000238418579,
"regularize": 0.1668892502784729,
"step": 310
},
{
"dpo_loss": 0.5544535517692566,
"epoch": 1.7855455833726972,
"grad_norm": 6.316947546186969,
"learning_rate": 4.053180295492203e-06,
"logits": -2.530224084854126,
"logps": -189.7359161376953,
"loss": 0.1665,
"objective": 0.15285438299179077,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8500000238418579,
"regularize": 0.15285435318946838,
"step": 315
},
{
"dpo_loss": 0.5565517544746399,
"epoch": 1.8138875767595655,
"grad_norm": 6.951800105794237,
"learning_rate": 4.014024217844167e-06,
"logits": -2.596423864364624,
"logps": -201.33631896972656,
"loss": 0.1609,
"objective": 0.1613713800907135,
"ranking_idealized": 0.9041666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.8166666626930237,
"regularize": 0.1613713800907135,
"step": 320
},
{
"dpo_loss": 0.5427613854408264,
"epoch": 1.8422295701464337,
"grad_norm": 7.385375003834793,
"learning_rate": 3.974272604254906e-06,
"logits": -2.746447801589966,
"logps": -198.5028839111328,
"loss": 0.1637,
"objective": 0.15741844475269318,
"ranking_idealized": 0.9583333134651184,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.8833333253860474,
"regularize": 0.15741844475269318,
"step": 325
},
{
"dpo_loss": 0.5491302609443665,
"epoch": 1.8705715635333018,
"grad_norm": 7.03671844997743,
"learning_rate": 3.933941090877615e-06,
"logits": -2.5696513652801514,
"logps": -197.8240203857422,
"loss": 0.1572,
"objective": 0.15931017696857452,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.8374999761581421,
"regularize": 0.15931017696857452,
"step": 330
},
{
"dpo_loss": 0.5589691400527954,
"epoch": 1.89891355692017,
"grad_norm": 6.477813384915639,
"learning_rate": 3.893045541966975e-06,
"logits": -2.762031316757202,
"logps": -203.58236694335938,
"loss": 0.1535,
"objective": 0.15087805688381195,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.8708333373069763,
"regularize": 0.15087805688381195,
"step": 335
},
{
"dpo_loss": 0.5663090944290161,
"epoch": 1.9272555503070383,
"grad_norm": 6.018506322545624,
"learning_rate": 3.8516020436389945e-06,
"logits": -2.7401764392852783,
"logps": -201.431884765625,
"loss": 0.1465,
"objective": 0.14212678372859955,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.8458333611488342,
"regularize": 0.14212678372859955,
"step": 340
},
{
"dpo_loss": 0.5585800409317017,
"epoch": 1.9555975436939064,
"grad_norm": 6.624036944677984,
"learning_rate": 3.8096268975436045e-06,
"logits": -2.8644747734069824,
"logps": -205.10971069335938,
"loss": 0.1496,
"objective": 0.15010811388492584,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.8500000238418579,
"regularize": 0.15010811388492584,
"step": 345
},
{
"dpo_loss": 0.5542294383049011,
"epoch": 1.9839395370807746,
"grad_norm": 6.797447561538416,
"learning_rate": 3.767136614452458e-06,
"logits": -2.858165740966797,
"logps": -216.2846221923828,
"loss": 0.1527,
"objective": 0.1548275649547577,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.862500011920929,
"regularize": 0.1548275649547577,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6759648323059082,
"eval_logits": -3.1101362705230713,
"eval_logps": -211.60682678222656,
"eval_loss": 0.39596185088157654,
"eval_objective": 0.39695027470588684,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5557851195335388,
"eval_regularize": 0.39695027470588684,
"eval_runtime": 259.1655,
"eval_samples_per_second": 22.341,
"eval_steps_per_second": 0.934,
"step": 350
},
{
"dpo_loss": 0.5424126386642456,
"epoch": 2.012281530467643,
"grad_norm": 5.8063883271711685,
"learning_rate": 3.724147907764478e-06,
"logits": -2.7706944942474365,
"logps": -205.6593780517578,
"loss": 0.1484,
"objective": 0.1335248053073883,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8958333134651184,
"regularize": 0.1335248053073883,
"step": 355
},
{
"dpo_loss": 0.5424516797065735,
"epoch": 2.040623523854511,
"grad_norm": 6.095590594476145,
"learning_rate": 3.6806776869317074e-06,
"logits": -2.8919057846069336,
"logps": -198.0377655029297,
"loss": 0.1296,
"objective": 0.13360460102558136,
"ranking_idealized": 0.9750000238418579,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.8958333134651184,
"regularize": 0.13360460102558136,
"step": 360
},
{
"dpo_loss": 0.5373047590255737,
"epoch": 2.0689655172413794,
"grad_norm": 6.231030884897393,
"learning_rate": 3.6367430508080283e-06,
"logits": -3.063735008239746,
"logps": -207.88970947265625,
"loss": 0.1326,
"objective": 0.12027280777692795,
"ranking_idealized": 0.9083333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8458333611488342,
"regularize": 0.12027280777692795,
"step": 365
},
{
"dpo_loss": 0.5498053431510925,
"epoch": 2.0973075106282475,
"grad_norm": 6.774825067357434,
"learning_rate": 3.5923612809233987e-06,
"logits": -3.0324106216430664,
"logps": -194.41429138183594,
"loss": 0.1275,
"objective": 0.12781473994255066,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.8541666865348816,
"regularize": 0.12781472504138947,
"step": 370
},
{
"dpo_loss": 0.5362412333488464,
"epoch": 2.1256495040151155,
"grad_norm": 5.992693343450592,
"learning_rate": 3.547549834686222e-06,
"logits": -3.0772500038146973,
"logps": -205.6930694580078,
"loss": 0.1251,
"objective": 0.12155468761920929,
"ranking_idealized": 0.9750000238418579,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.9125000238418579,
"regularize": 0.12155468761920929,
"step": 375
},
{
"dpo_loss": 0.5477665066719055,
"epoch": 2.153991497401984,
"grad_norm": 5.66117672582953,
"learning_rate": 3.5023263385165346e-06,
"logits": -2.971487522125244,
"logps": -195.91168212890625,
"loss": 0.1269,
"objective": 0.11776351928710938,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.8833333253860474,
"regularize": 0.11776351928710938,
"step": 380
},
{
"dpo_loss": 0.5528424382209778,
"epoch": 2.182333490788852,
"grad_norm": 5.673453112921881,
"learning_rate": 3.4567085809127247e-06,
"logits": -3.0804078578948975,
"logps": -187.17169189453125,
"loss": 0.1248,
"objective": 0.1074480265378952,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.9166666865348816,
"regularize": 0.1074480265378952,
"step": 385
},
{
"dpo_loss": 0.5393837094306946,
"epoch": 2.21067548417572,
"grad_norm": 6.024104465892304,
"learning_rate": 3.410714505454486e-06,
"logits": -2.998112916946411,
"logps": -206.6367645263672,
"loss": 0.1224,
"objective": 0.11136513203382492,
"ranking_idealized": 0.9083333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.8666666746139526,
"regularize": 0.11136512458324432,
"step": 390
},
{
"dpo_loss": 0.5289559364318848,
"epoch": 2.2390174775625886,
"grad_norm": 6.100671720050322,
"learning_rate": 3.364362203744777e-06,
"logits": -3.014930009841919,
"logps": -199.62350463867188,
"loss": 0.1358,
"objective": 0.1332855224609375,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.8999999761581421,
"regularize": 0.1332855224609375,
"step": 395
},
{
"dpo_loss": 0.5282385945320129,
"epoch": 2.2673594709494567,
"grad_norm": 5.884120825175806,
"learning_rate": 3.3176699082935546e-06,
"logits": -3.104701042175293,
"logps": -204.1650390625,
"loss": 0.1267,
"objective": 0.13824278116226196,
"ranking_idealized": 0.9666666388511658,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.9083333611488342,
"regularize": 0.13824278116226196,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6775676608085632,
"eval_logits": -3.2515387535095215,
"eval_logps": -201.03680419921875,
"eval_loss": 0.39813509583473206,
"eval_objective": 0.39980101585388184,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5619834661483765,
"eval_regularize": 0.39980101585388184,
"eval_runtime": 259.6475,
"eval_samples_per_second": 22.299,
"eval_steps_per_second": 0.932,
"step": 400
},
{
"dpo_loss": 0.5318711400032043,
"epoch": 2.295701464336325,
"grad_norm": 5.891428026423688,
"learning_rate": 3.2706559853460818e-06,
"logits": -3.1382436752319336,
"logps": -204.19851684570312,
"loss": 0.1245,
"objective": 0.12317010760307312,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.8999999761581421,
"regularize": 0.12317009270191193,
"step": 405
},
{
"dpo_loss": 0.5330458879470825,
"epoch": 2.324043457723193,
"grad_norm": 5.883839309354464,
"learning_rate": 3.2233389276586325e-06,
"logits": -2.8399434089660645,
"logps": -203.78355407714844,
"loss": 0.1172,
"objective": 0.11361113935709,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.862500011920929,
"regularize": 0.1136111319065094,
"step": 410
},
{
"dpo_loss": 0.5357276797294617,
"epoch": 2.3523854511100613,
"grad_norm": 5.777709064523667,
"learning_rate": 3.1757373472244324e-06,
"logits": -2.7951467037200928,
"logps": -203.05201721191406,
"loss": 0.1182,
"objective": 0.12953059375286102,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.9041666388511658,
"regularize": 0.12953059375286102,
"step": 415
},
{
"dpo_loss": 0.5517702698707581,
"epoch": 2.3807274444969297,
"grad_norm": 5.823648414077854,
"learning_rate": 3.127869967952698e-06,
"logits": -2.653197765350342,
"logps": -199.99070739746094,
"loss": 0.124,
"objective": 0.12403346598148346,
"ranking_idealized": 0.9041666388511658,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.8333333134651184,
"regularize": 0.12403346598148346,
"step": 420
},
{
"dpo_loss": 0.5411447286605835,
"epoch": 2.409069437883798,
"grad_norm": 5.406034966497648,
"learning_rate": 3.0797556183036582e-06,
"logits": -2.7264721393585205,
"logps": -199.3270721435547,
"loss": 0.1206,
"objective": 0.11311660706996918,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.887499988079071,
"regularize": 0.11311660706996918,
"step": 425
},
{
"dpo_loss": 0.5396248698234558,
"epoch": 2.4374114312706663,
"grad_norm": 5.630938578254106,
"learning_rate": 3.0314132238824416e-06,
"logits": -2.8288919925689697,
"logps": -200.36387634277344,
"loss": 0.1145,
"objective": 0.10814479738473892,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.8916666507720947,
"regularize": 0.10814479738473892,
"step": 430
},
{
"dpo_loss": 0.537260890007019,
"epoch": 2.4657534246575343,
"grad_norm": 5.382852925810034,
"learning_rate": 2.9828617999947647e-06,
"logits": -2.9378559589385986,
"logps": -207.9824676513672,
"loss": 0.1177,
"objective": 0.1222720518708229,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.8833333253860474,
"regularize": 0.1222720518708229,
"step": 435
},
{
"dpo_loss": 0.5335346460342407,
"epoch": 2.4940954180444024,
"grad_norm": 5.4087072313434,
"learning_rate": 2.9341204441673267e-06,
"logits": -2.8663976192474365,
"logps": -201.47125244140625,
"loss": 0.1171,
"objective": 0.11767010390758514,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.887499988079071,
"regularize": 0.11767010390758514,
"step": 440
},
{
"dpo_loss": 0.5356226563453674,
"epoch": 2.5224374114312704,
"grad_norm": 5.709598259442252,
"learning_rate": 2.8852083286358647e-06,
"logits": -2.838826894760132,
"logps": -196.2780303955078,
"loss": 0.1126,
"objective": 0.10499007254838943,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.9041666388511658,
"regularize": 0.10499005764722824,
"step": 445
},
{
"dpo_loss": 0.5633688569068909,
"epoch": 2.550779404818139,
"grad_norm": 5.3793816763659255,
"learning_rate": 2.8361446928038298e-06,
"logits": -2.838387966156006,
"logps": -201.06651306152344,
"loss": 0.1121,
"objective": 0.11985477060079575,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8458333611488342,
"regularize": 0.11985477060079575,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6782127022743225,
"eval_logits": -2.9522619247436523,
"eval_logps": -192.7809295654297,
"eval_loss": 0.39569520950317383,
"eval_objective": 0.3975852131843567,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5619834661483765,
"eval_regularize": 0.3975852131843567,
"eval_runtime": 258.9142,
"eval_samples_per_second": 22.363,
"eval_steps_per_second": 0.935,
"step": 450
},
{
"dpo_loss": 0.5279621481895447,
"epoch": 2.579121398205007,
"grad_norm": 5.392681143501708,
"learning_rate": 2.7869488356746344e-06,
"logits": -2.902580499649048,
"logps": -197.00804138183594,
"loss": 0.1147,
"objective": 0.11110316216945648,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.9041666388511658,
"regularize": 0.11110316216945648,
"step": 455
},
{
"dpo_loss": 0.5277626514434814,
"epoch": 2.6074633915918755,
"grad_norm": 5.315354427263376,
"learning_rate": 2.7376401082604563e-06,
"logits": -3.0961711406707764,
"logps": -201.3343048095703,
"loss": 0.1143,
"objective": 0.11841437220573425,
"ranking_idealized": 0.9624999761581421,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.9291666746139526,
"regularize": 0.11841436475515366,
"step": 460
},
{
"dpo_loss": 0.5351486802101135,
"epoch": 2.6358053849787435,
"grad_norm": 5.411425844401666,
"learning_rate": 2.6882379059705953e-06,
"logits": -3.0071170330047607,
"logps": -197.46665954589844,
"loss": 0.1123,
"objective": 0.11872568726539612,
"ranking_idealized": 0.9166666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8833333253860474,
"regularize": 0.11872567981481552,
"step": 465
},
{
"dpo_loss": 0.5352925658226013,
"epoch": 2.6641473783656116,
"grad_norm": 5.65268285367403,
"learning_rate": 2.6387616609823506e-06,
"logits": -3.0599467754364014,
"logps": -206.38381958007812,
"loss": 0.1029,
"objective": 0.103180892765522,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.9166666865348816,
"regularize": 0.103180892765522,
"step": 470
},
{
"dpo_loss": 0.5457909107208252,
"epoch": 2.69248937175248,
"grad_norm": 5.4108717659373395,
"learning_rate": 2.5892308345974517e-06,
"logits": -2.9345638751983643,
"logps": -191.0044403076172,
"loss": 0.1057,
"objective": 0.1046978086233139,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.887499988079071,
"regularize": 0.1046978086233139,
"step": 475
},
{
"dpo_loss": 0.5475970506668091,
"epoch": 2.720831365139348,
"grad_norm": 5.319894863881298,
"learning_rate": 2.53966490958702e-06,
"logits": -3.068021059036255,
"logps": -189.0258026123047,
"loss": 0.1101,
"objective": 0.10782204568386078,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.8916666507720947,
"regularize": 0.10782204568386078,
"step": 480
},
{
"dpo_loss": 0.537803590297699,
"epoch": 2.7491733585262166,
"grad_norm": 5.436329599180351,
"learning_rate": 2.490083382528097e-06,
"logits": -3.1013996601104736,
"logps": -200.41253662109375,
"loss": 0.1114,
"objective": 0.11053992807865143,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.887499988079071,
"regularize": 0.11053992807865143,
"step": 485
},
{
"dpo_loss": 0.5218387842178345,
"epoch": 2.7775153519130846,
"grad_norm": 5.475491663367257,
"learning_rate": 2.440505756134732e-06,
"logits": -3.1719369888305664,
"logps": -201.2511444091797,
"loss": 0.1091,
"objective": 0.1095014289021492,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.9041666388511658,
"regularize": 0.1095014289021492,
"step": 490
},
{
"dpo_loss": 0.5326829552650452,
"epoch": 2.8058573452999527,
"grad_norm": 5.099192097500262,
"learning_rate": 2.3909515315866606e-06,
"logits": -3.1574199199676514,
"logps": -198.6073760986328,
"loss": 0.1017,
"objective": 0.09904598444700241,
"ranking_idealized": 0.9166666865348816,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.887499988079071,
"regularize": 0.09904597699642181,
"step": 495
},
{
"dpo_loss": 0.5235782861709595,
"epoch": 2.8341993386868207,
"grad_norm": 5.2339920064616186,
"learning_rate": 2.341440200858589e-06,
"logits": -3.177107810974121,
"logps": -198.19998168945312,
"loss": 0.1063,
"objective": 0.10962475836277008,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.9125000238418579,
"regularize": 0.10962474346160889,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6759718656539917,
"eval_logits": -3.2835006713867188,
"eval_logps": -195.79200744628906,
"eval_loss": 0.39410659670829773,
"eval_objective": 0.3949359357357025,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.567148745059967,
"eval_regularize": 0.3949359357357025,
"eval_runtime": 259.0636,
"eval_samples_per_second": 22.35,
"eval_steps_per_second": 0.934,
"step": 500
},
{
"dpo_loss": 0.5304385423660278,
"epoch": 2.862541332073689,
"grad_norm": 5.373601466021835,
"learning_rate": 2.2919912390530945e-06,
"logits": -3.0923917293548584,
"logps": -199.0631866455078,
"loss": 0.102,
"objective": 0.10511735081672668,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.9083333611488342,
"regularize": 0.10511735081672668,
"step": 505
},
{
"dpo_loss": 0.5375287532806396,
"epoch": 2.8908833254605573,
"grad_norm": 6.218818504345056,
"learning_rate": 2.242624096740164e-06,
"logits": -3.0648419857025146,
"logps": -198.03871154785156,
"loss": 0.1009,
"objective": 0.09942923486232758,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.8916666507720947,
"regularize": 0.09942923486232758,
"step": 510
},
{
"dpo_loss": 0.5454570651054382,
"epoch": 2.9192253188474258,
"grad_norm": 5.360667892296426,
"learning_rate": 2.193358192306384e-06,
"logits": -3.177243709564209,
"logps": -192.1931610107422,
"loss": 0.1015,
"objective": 0.1067223846912384,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.887499988079071,
"regularize": 0.1067223846912384,
"step": 515
},
{
"dpo_loss": 0.5447422862052917,
"epoch": 2.947567312234294,
"grad_norm": 5.250252460609212,
"learning_rate": 2.1442129043167877e-06,
"logits": -2.988645076751709,
"logps": -196.80099487304688,
"loss": 0.0957,
"objective": 0.09414150565862656,
"ranking_idealized": 0.9083333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.8833333253860474,
"regularize": 0.09414150565862656,
"step": 520
},
{
"dpo_loss": 0.5430293679237366,
"epoch": 2.975909305621162,
"grad_norm": 5.566335571750069,
"learning_rate": 2.0952075638923656e-06,
"logits": -2.975144863128662,
"logps": -196.40476989746094,
"loss": 0.0946,
"objective": 0.10850825160741806,
"ranking_idealized": 0.8999999761581421,
"ranking_idealized_expo": 0.5833333134651184,
"ranking_simple": 0.8666666746139526,
"regularize": 0.10850825160741806,
"step": 525
},
{
"dpo_loss": 0.5192977786064148,
"epoch": 3.0042512990080303,
"grad_norm": 5.207616334659725,
"learning_rate": 2.046361447106244e-06,
"logits": -2.9917781352996826,
"logps": -199.7664031982422,
"loss": 0.099,
"objective": 0.09743621945381165,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.925000011920929,
"regularize": 0.09743621200323105,
"step": 530
},
{
"dpo_loss": 0.5329768061637878,
"epoch": 3.0325932923948984,
"grad_norm": 5.005538343407122,
"learning_rate": 1.997693767401503e-06,
"logits": -3.075000286102295,
"logps": -204.2377166748047,
"loss": 0.0882,
"objective": 0.08230598270893097,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.9083333611488342,
"regularize": 0.08230597525835037,
"step": 535
},
{
"dpo_loss": 0.538392961025238,
"epoch": 3.0609352857817664,
"grad_norm": 4.866997353798794,
"learning_rate": 1.9492236680336486e-06,
"logits": -3.1421122550964355,
"logps": -189.7461700439453,
"loss": 0.0842,
"objective": 0.07560276240110397,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.9083333611488342,
"regularize": 0.07560275495052338,
"step": 540
},
{
"dpo_loss": 0.5382309556007385,
"epoch": 3.089277279168635,
"grad_norm": 5.075388727380349,
"learning_rate": 1.9009702145406728e-06,
"logits": -3.102593421936035,
"logps": -204.09027099609375,
"loss": 0.0817,
"objective": 0.08249451220035553,
"ranking_idealized": 0.9041666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8833333253860474,
"regularize": 0.08249450474977493,
"step": 545
},
{
"dpo_loss": 0.5309434533119202,
"epoch": 3.117619272555503,
"grad_norm": 4.919742025901051,
"learning_rate": 1.852952387243698e-06,
"logits": -2.887840747833252,
"logps": -203.031982421875,
"loss": 0.0891,
"objective": 0.0902470126748085,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.9375,
"regularize": 0.0902470126748085,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.6777102947235107,
"eval_logits": -3.195338249206543,
"eval_logps": -196.1659393310547,
"eval_loss": 0.39561545848846436,
"eval_objective": 0.39604148268699646,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5609503984451294,
"eval_regularize": 0.39604148268699646,
"eval_runtime": 259.2511,
"eval_samples_per_second": 22.334,
"eval_steps_per_second": 0.933,
"step": 550
},
{
"dpo_loss": 0.5161585211753845,
"epoch": 3.1459612659423715,
"grad_norm": 5.02329657218416,
"learning_rate": 1.8051890737811395e-06,
"logits": -3.032655954360962,
"logps": -205.76190185546875,
"loss": 0.0842,
"objective": 0.08457961678504944,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.949999988079071,
"regularize": 0.08457960933446884,
"step": 555
},
{
"dpo_loss": 0.5269332528114319,
"epoch": 3.1743032593292395,
"grad_norm": 5.022767770754425,
"learning_rate": 1.7576990616793139e-06,
"logits": -3.001573324203491,
"logps": -205.6256561279297,
"loss": 0.0777,
"objective": 0.07936005294322968,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.9041666388511658,
"regularize": 0.07936005294322968,
"step": 560
},
{
"dpo_loss": 0.5309363603591919,
"epoch": 3.2026452527161076,
"grad_norm": 5.117713576028772,
"learning_rate": 1.7105010309624381e-06,
"logits": -3.0092759132385254,
"logps": -198.69540405273438,
"loss": 0.0791,
"objective": 0.07759826630353928,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.8999999761581421,
"regularize": 0.07759825885295868,
"step": 565
},
{
"dpo_loss": 0.5266720652580261,
"epoch": 3.230987246102976,
"grad_norm": 4.836758469100523,
"learning_rate": 1.6636135468049122e-06,
"logits": -2.9470977783203125,
"logps": -202.33779907226562,
"loss": 0.0828,
"objective": 0.08297502994537354,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.9041666388511658,
"regularize": 0.08297502249479294,
"step": 570
},
{
"dpo_loss": 0.5284319519996643,
"epoch": 3.259329239489844,
"grad_norm": 5.27684795086492,
"learning_rate": 1.617055052228768e-06,
"logits": -3.067121744155884,
"logps": -201.16802978515625,
"loss": 0.0794,
"objective": 0.08327650278806686,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.8999999761581421,
"regularize": 0.08327650278806686,
"step": 575
},
{
"dpo_loss": 0.5467706918716431,
"epoch": 3.287671232876712,
"grad_norm": 4.999181466831561,
"learning_rate": 1.5708438608491816e-06,
"logits": -3.0922207832336426,
"logps": -200.73341369628906,
"loss": 0.0809,
"objective": 0.08547008782625198,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.887499988079071,
"regularize": 0.08547007292509079,
"step": 580
},
{
"dpo_loss": 0.5353319644927979,
"epoch": 3.3160132262635806,
"grad_norm": 4.7454466484307485,
"learning_rate": 1.524998149670871e-06,
"logits": -3.148766040802002,
"logps": -199.28677368164062,
"loss": 0.0767,
"objective": 0.06919746100902557,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.887499988079071,
"regularize": 0.06919745355844498,
"step": 585
},
{
"dpo_loss": 0.5416805148124695,
"epoch": 3.3443552196504487,
"grad_norm": 4.994244494597657,
"learning_rate": 1.479535951938243e-06,
"logits": -3.191918134689331,
"logps": -201.75802612304688,
"loss": 0.0757,
"objective": 0.06989765167236328,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.8958333134651184,
"regularize": 0.06989765167236328,
"step": 590
},
{
"dpo_loss": 0.5297635197639465,
"epoch": 3.372697213037317,
"grad_norm": 5.147488031975634,
"learning_rate": 1.43447515004208e-06,
"logits": -3.0706212520599365,
"logps": -200.92311096191406,
"loss": 0.0775,
"objective": 0.0749397724866867,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.9166666865348816,
"regularize": 0.0749397724866867,
"step": 595
},
{
"dpo_loss": 0.511430561542511,
"epoch": 3.4010392064241852,
"grad_norm": 4.864631411349059,
"learning_rate": 1.3898334684855647e-06,
"logits": -3.051577091217041,
"logps": -202.49258422851562,
"loss": 0.0749,
"objective": 0.07237013429403305,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.9375,
"regularize": 0.07237013429403305,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.6780565977096558,
"eval_logits": -3.1966371536254883,
"eval_logps": -194.1237335205078,
"eval_loss": 0.3962407112121582,
"eval_objective": 0.39730900526046753,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5743801593780518,
"eval_regularize": 0.39730900526046753,
"eval_runtime": 258.6121,
"eval_samples_per_second": 22.389,
"eval_steps_per_second": 0.936,
"step": 600
},
{
"dpo_loss": 0.5354328155517578,
"epoch": 3.4293811998110533,
"grad_norm": 5.220508290696569,
"learning_rate": 1.3456284669124159e-06,
"logits": -3.0896830558776855,
"logps": -204.61468505859375,
"loss": 0.0733,
"objective": 0.06684383749961853,
"ranking_idealized": 0.9166666865348816,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.9083333611488342,
"regularize": 0.06684383004903793,
"step": 605
},
{
"dpo_loss": 0.5366904139518738,
"epoch": 3.4577231931979218,
"grad_norm": 4.948012773738948,
"learning_rate": 1.301877533199859e-06,
"logits": -3.0734212398529053,
"logps": -203.69866943359375,
"loss": 0.0729,
"objective": 0.06990881264209747,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.8958333134651184,
"regularize": 0.06990881264209747,
"step": 610
},
{
"dpo_loss": 0.5279187560081482,
"epoch": 3.48606518658479,
"grad_norm": 5.16746605562179,
"learning_rate": 1.2585978766191726e-06,
"logits": -3.0537939071655273,
"logps": -202.3527374267578,
"loss": 0.0737,
"objective": 0.07423458993434906,
"ranking_idealized": 0.9083333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.875,
"regularize": 0.07423458248376846,
"step": 615
},
{
"dpo_loss": 0.5187819004058838,
"epoch": 3.514407179971658,
"grad_norm": 4.801574670976317,
"learning_rate": 1.2158065210664848e-06,
"logits": -2.913203001022339,
"logps": -204.0981903076172,
"loss": 0.0707,
"objective": 0.06695393472909927,
"ranking_idealized": 0.8999999761581421,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8999999761581421,
"regularize": 0.06695392727851868,
"step": 620
},
{
"dpo_loss": 0.5320748686790466,
"epoch": 3.5427491733585263,
"grad_norm": 4.901983573451279,
"learning_rate": 1.1735202983664803e-06,
"logits": -3.018667459487915,
"logps": -197.7013397216797,
"loss": 0.0682,
"objective": 0.06922433525323868,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.9333333373069763,
"regularize": 0.06922433525323868,
"step": 625
},
{
"dpo_loss": 0.5438559651374817,
"epoch": 3.5710911667453944,
"grad_norm": 4.916430683667445,
"learning_rate": 1.1317558416516696e-06,
"logits": -3.063880681991577,
"logps": -198.75747680664062,
"loss": 0.0704,
"objective": 0.07144972681999207,
"ranking_idealized": 0.9125000238418579,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.9125000238418579,
"regularize": 0.07144972681999207,
"step": 630
},
{
"dpo_loss": 0.5314496159553528,
"epoch": 3.5994331601322624,
"grad_norm": 4.767769596097874,
"learning_rate": 1.0905295788197993e-06,
"logits": -3.036067247390747,
"logps": -196.23695373535156,
"loss": 0.0647,
"objective": 0.05931680276989937,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.9333333373069763,
"regularize": 0.05931679159402847,
"step": 635
},
{
"dpo_loss": 0.5109093189239502,
"epoch": 3.627775153519131,
"grad_norm": 4.841523497854697,
"learning_rate": 1.049857726072005e-06,
"logits": -3.0281994342803955,
"logps": -211.7641143798828,
"loss": 0.0648,
"objective": 0.06795307993888855,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.9125000238418579,
"regularize": 0.06795307248830795,
"step": 640
},
{
"dpo_loss": 0.5220555067062378,
"epoch": 3.656117146905999,
"grad_norm": 4.756494572923249,
"learning_rate": 1.0097562815342215e-06,
"logits": -3.0403778553009033,
"logps": -197.11727905273438,
"loss": 0.0696,
"objective": 0.07045839726924896,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.8999999761581421,
"regularize": 0.07045838981866837,
"step": 645
},
{
"dpo_loss": 0.5448586344718933,
"epoch": 3.6844591402928675,
"grad_norm": 4.78589420445802,
"learning_rate": 9.702410189643838e-07,
"logits": -3.0378682613372803,
"logps": -199.30670166015625,
"loss": 0.062,
"objective": 0.06362789124250412,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.9208333492279053,
"regularize": 0.06362788379192352,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.6778165698051453,
"eval_logits": -3.241185188293457,
"eval_logps": -195.3243865966797,
"eval_loss": 0.39558976888656616,
"eval_objective": 0.3966863453388214,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5702479481697083,
"eval_regularize": 0.3966863453388214,
"eval_runtime": 259.3281,
"eval_samples_per_second": 22.327,
"eval_steps_per_second": 0.933,
"step": 650
},
{
"dpo_loss": 0.5378040671348572,
"epoch": 3.7128011336797355,
"grad_norm": 4.991430717748734,
"learning_rate": 9.313274815478698e-07,
"logits": -3.06374454498291,
"logps": -206.94361877441406,
"loss": 0.0675,
"objective": 0.07482859492301941,
"ranking_idealized": 0.9166666865348816,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.8833333253860474,
"regularize": 0.07482858747243881,
"step": 655
},
{
"dpo_loss": 0.533437192440033,
"epoch": 3.7411431270666036,
"grad_norm": 4.914085991583788,
"learning_rate": 8.930309757836517e-07,
"logits": -3.1829257011413574,
"logps": -198.15655517578125,
"loss": 0.0645,
"objective": 0.0638025775551796,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.9458333253860474,
"regularize": 0.0638025775551796,
"step": 660
},
{
"dpo_loss": 0.5339053869247437,
"epoch": 3.769485120453472,
"grad_norm": 4.883348560718917,
"learning_rate": 8.553665654635343e-07,
"logits": -3.0380542278289795,
"logps": -192.95997619628906,
"loss": 0.0651,
"objective": 0.06292819231748581,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.9125000238418579,
"regularize": 0.06292817741632462,
"step": 665
},
{
"dpo_loss": 0.5281752943992615,
"epoch": 3.79782711384034,
"grad_norm": 5.904298344985475,
"learning_rate": 8.183490657468687e-07,
"logits": -3.175488233566284,
"logps": -201.79714965820312,
"loss": 0.0635,
"objective": 0.06745120882987976,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.612500011920929,
"ranking_simple": 0.9125000238418579,
"regularize": 0.06745120882987976,
"step": 670
},
{
"dpo_loss": 0.535234808921814,
"epoch": 3.826169107227208,
"grad_norm": 4.6497906673921685,
"learning_rate": 7.819930373330669e-07,
"logits": -3.079956531524658,
"logps": -195.46868896484375,
"loss": 0.0606,
"objective": 0.05796652287244797,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.9041666388511658,
"regularize": 0.05796651914715767,
"step": 675
},
{
"dpo_loss": 0.5492002964019775,
"epoch": 3.8545111006140766,
"grad_norm": 5.184144239252589,
"learning_rate": 7.463127807341966e-07,
"logits": -3.021759033203125,
"logps": -195.5998992919922,
"loss": 0.0607,
"objective": 0.0610785037279129,
"ranking_idealized": 0.9083333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.862500011920929,
"regularize": 0.06107849255204201,
"step": 680
},
{
"dpo_loss": 0.5241864323616028,
"epoch": 3.8828530940009447,
"grad_norm": 5.005865678419639,
"learning_rate": 7.113223306499336e-07,
"logits": -3.1358683109283447,
"logps": -201.37371826171875,
"loss": 0.0629,
"objective": 0.06049242988228798,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.9125000238418579,
"regularize": 0.06049241125583649,
"step": 685
},
{
"dpo_loss": 0.5277370810508728,
"epoch": 3.9111950873878127,
"grad_norm": 4.853091570155952,
"learning_rate": 6.770354504470575e-07,
"logits": -3.0913193225860596,
"logps": -194.60806274414062,
"loss": 0.0576,
"objective": 0.05857709422707558,
"ranking_idealized": 0.9541666507720947,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.9333333373069763,
"regularize": 0.05857709422707558,
"step": 690
},
{
"dpo_loss": 0.5350156426429749,
"epoch": 3.9395370807746812,
"grad_norm": 4.643145060858906,
"learning_rate": 6.434656267456843e-07,
"logits": -3.007568836212158,
"logps": -196.4861297607422,
"loss": 0.062,
"objective": 0.06293628364801407,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.9166666865348816,
"regularize": 0.06293627619743347,
"step": 695
},
{
"dpo_loss": 0.537192165851593,
"epoch": 3.9678790741615493,
"grad_norm": 4.752148657093376,
"learning_rate": 6.106260641143547e-07,
"logits": -3.088932991027832,
"logps": -200.46910095214844,
"loss": 0.0583,
"objective": 0.06041649729013443,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.9375,
"regularize": 0.06041649356484413,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.677208423614502,
"eval_logits": -3.2432026863098145,
"eval_logps": -196.44686889648438,
"eval_loss": 0.3955562114715576,
"eval_objective": 0.3960891366004944,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.5640496015548706,
"eval_regularize": 0.3960891366004944,
"eval_runtime": 258.7136,
"eval_samples_per_second": 22.38,
"eval_steps_per_second": 0.935,
"step": 700
},
{
"dpo_loss": 0.5246204733848572,
"epoch": 3.9962210675484178,
"grad_norm": 4.771032845540052,
"learning_rate": 5.785296798760601e-07,
"logits": -3.013643980026245,
"logps": -202.21218872070312,
"loss": 0.0575,
"objective": 0.04958561435341835,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.9208333492279053,
"regularize": 0.049585599452257156,
"step": 705
},
{
"dpo_loss": 0.5198561549186707,
"epoch": 4.024563060935286,
"grad_norm": 4.697507600765225,
"learning_rate": 5.471890990272666e-07,
"logits": -3.1067426204681396,
"logps": -207.82223510742188,
"loss": 0.0497,
"objective": 0.054764509201049805,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.9166666865348816,
"regularize": 0.05476450175046921,
"step": 710
},
{
"dpo_loss": 0.5302870869636536,
"epoch": 4.052905054322154,
"grad_norm": 4.631394448060559,
"learning_rate": 5.166166492719124e-07,
"logits": -3.0587379932403564,
"logps": -204.6709442138672,
"loss": 0.0462,
"objective": 0.044012073427438736,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.9125000238418579,
"regularize": 0.04401206597685814,
"step": 715
},
{
"dpo_loss": 0.5232208371162415,
"epoch": 4.081247047709022,
"grad_norm": 4.929724097820593,
"learning_rate": 4.868243561723535e-07,
"logits": -2.9354002475738525,
"logps": -205.88121032714844,
"loss": 0.0468,
"objective": 0.053409043699502945,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.925000011920929,
"regularize": 0.05340903252363205,
"step": 720
},
{
"dpo_loss": 0.5423314571380615,
"epoch": 4.109589041095891,
"grad_norm": 4.739841142104876,
"learning_rate": 4.57823938419153e-07,
"logits": -3.0645394325256348,
"logps": -200.61724853515625,
"loss": 0.0479,
"objective": 0.04531220719218254,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.9208333492279053,
"regularize": 0.045312199741601944,
"step": 725
},
{
"dpo_loss": 0.5259865522384644,
"epoch": 4.137931034482759,
"grad_norm": 4.6973617362423665,
"learning_rate": 4.2962680322157335e-07,
"logits": -3.1625542640686035,
"logps": -201.07965087890625,
"loss": 0.0486,
"objective": 0.049515120685100555,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.887499988079071,
"regularize": 0.04951511323451996,
"step": 730
},
{
"dpo_loss": 0.5347721576690674,
"epoch": 4.166273027869627,
"grad_norm": 4.718934829997983,
"learning_rate": 4.0224404182059443e-07,
"logits": -3.0613696575164795,
"logps": -204.30772399902344,
"loss": 0.0439,
"objective": 0.04426734894514084,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.949999988079071,
"regularize": 0.04426734521985054,
"step": 735
},
{
"dpo_loss": 0.5107501149177551,
"epoch": 4.194615021256495,
"grad_norm": 4.7379442831945635,
"learning_rate": 3.756864251262143e-07,
"logits": -3.04003643989563,
"logps": -202.8253631591797,
"loss": 0.0459,
"objective": 0.04314772039651871,
"ranking_idealized": 0.9583333134651184,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.949999988079071,
"regularize": 0.043147701770067215,
"step": 740
},
{
"dpo_loss": 0.5249863266944885,
"epoch": 4.222957014643363,
"grad_norm": 4.71988199553104,
"learning_rate": 3.499643994807486e-07,
"logits": -3.1296160221099854,
"logps": -198.77182006835938,
"loss": 0.046,
"objective": 0.045759402215480804,
"ranking_idealized": 0.9166666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.8958333134651184,
"regularize": 0.04575938731431961,
"step": 745
},
{
"dpo_loss": 0.522843599319458,
"epoch": 4.251299008030231,
"grad_norm": 4.672227955095767,
"learning_rate": 3.250880825498026e-07,
"logits": -3.2786660194396973,
"logps": -199.6768035888672,
"loss": 0.0451,
"objective": 0.049515463411808014,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.9166666865348816,
"regularize": 0.04951544478535652,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.6771067380905151,
"eval_logits": -3.266589403152466,
"eval_logps": -195.43978881835938,
"eval_loss": 0.39519038796424866,
"eval_objective": 0.3954727351665497,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.567148745059967,
"eval_regularize": 0.3954727351665497,
"eval_runtime": 258.4936,
"eval_samples_per_second": 22.399,
"eval_steps_per_second": 0.936,
"step": 750
},
{
"dpo_loss": 0.528011679649353,
"epoch": 4.2796410014171,
"grad_norm": 5.004941031227222,
"learning_rate": 3.0106725934252095e-07,
"logits": -3.2007675170898438,
"logps": -196.980224609375,
"loss": 0.0469,
"objective": 0.052510153502225876,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.9166666865348816,
"regularize": 0.05251014232635498,
"step": 755
},
{
"dpo_loss": 0.5268819332122803,
"epoch": 4.307982994803968,
"grad_norm": 4.8743952598559135,
"learning_rate": 2.779113783626916e-07,
"logits": -3.166001796722412,
"logps": -206.85211181640625,
"loss": 0.0436,
"objective": 0.04319094866514206,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5874999761581421,
"ranking_simple": 0.9291666746139526,
"regularize": 0.04319094493985176,
"step": 760
},
{
"dpo_loss": 0.5119529962539673,
"epoch": 4.336324988190836,
"grad_norm": 4.7936404352453845,
"learning_rate": 2.5562954789221164e-07,
"logits": -3.224353790283203,
"logps": -204.93324279785156,
"loss": 0.0447,
"objective": 0.04502396285533905,
"ranking_idealized": 0.9624999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.9583333134651184,
"regularize": 0.04502394422888756,
"step": 765
},
{
"dpo_loss": 0.5143262147903442,
"epoch": 4.364666981577704,
"grad_norm": 4.845346036030975,
"learning_rate": 2.3423053240837518e-07,
"logits": -3.086646318435669,
"logps": -200.40354919433594,
"loss": 0.0447,
"objective": 0.04372342303395271,
"ranking_idealized": 0.9666666388511658,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.9708333611488342,
"regularize": 0.043723396956920624,
"step": 770
},
{
"dpo_loss": 0.5219811797142029,
"epoch": 4.393008974964572,
"grad_norm": 4.882318635829277,
"learning_rate": 2.137227491364016e-07,
"logits": -3.1227707862854004,
"logps": -202.45298767089844,
"loss": 0.0431,
"objective": 0.042134013026952744,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.9125000238418579,
"regularize": 0.04213400185108185,
"step": 775
},
{
"dpo_loss": 0.5324522852897644,
"epoch": 4.42135096835144,
"grad_norm": 5.178173799388018,
"learning_rate": 1.941142647385469e-07,
"logits": -3.1812171936035156,
"logps": -196.50355529785156,
"loss": 0.0404,
"objective": 0.039291638880968094,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.925000011920929,
"regularize": 0.0392916202545166,
"step": 780
},
{
"dpo_loss": 0.543372392654419,
"epoch": 4.449692961738309,
"grad_norm": 4.563712486054605,
"learning_rate": 1.7541279214111277e-07,
"logits": -3.204663038253784,
"logps": -198.68594360351562,
"loss": 0.0495,
"objective": 0.05586666613817215,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.9291666746139526,
"regularize": 0.05586665868759155,
"step": 785
},
{
"dpo_loss": 0.5179670453071594,
"epoch": 4.478034955125177,
"grad_norm": 4.962116136464137,
"learning_rate": 1.5762568750059604e-07,
"logits": -3.1283469200134277,
"logps": -203.68194580078125,
"loss": 0.0426,
"objective": 0.04570373520255089,
"ranking_idealized": 0.949999988079071,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.9541666507720947,
"regularize": 0.04570373147726059,
"step": 790
},
{
"dpo_loss": 0.525623083114624,
"epoch": 4.506376948512045,
"grad_norm": 4.808999753603184,
"learning_rate": 1.4075994731016895e-07,
"logits": -3.018510580062866,
"logps": -205.88327026367188,
"loss": 0.0435,
"objective": 0.036049842834472656,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.925000011920929,
"regularize": 0.03604983165860176,
"step": 795
},
{
"dpo_loss": 0.5233331918716431,
"epoch": 4.534718941898913,
"grad_norm": 4.734460423236949,
"learning_rate": 1.2482220564763669e-07,
"logits": -3.0628395080566406,
"logps": -201.49278259277344,
"loss": 0.0438,
"objective": 0.04488484933972359,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.925000011920929,
"regularize": 0.04488483443856239,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.6771342158317566,
"eval_logits": -3.2692906856536865,
"eval_logps": -195.2318878173828,
"eval_loss": 0.39515408873558044,
"eval_objective": 0.39557480812072754,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.56611567735672,
"eval_regularize": 0.39557480812072754,
"eval_runtime": 258.9479,
"eval_samples_per_second": 22.36,
"eval_steps_per_second": 0.935,
"step": 800
},
{
"dpo_loss": 0.5237378478050232,
"epoch": 4.563060935285781,
"grad_norm": 4.695279873765779,
"learning_rate": 1.0981873156594381e-07,
"logits": -3.0811893939971924,
"logps": -198.17877197265625,
"loss": 0.0405,
"objective": 0.04346688091754913,
"ranking_idealized": 0.8999999761581421,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.8958333134651184,
"regularize": 0.043466873466968536,
"step": 805
},
{
"dpo_loss": 0.5305168032646179,
"epoch": 4.59140292867265,
"grad_norm": 4.641921422533774,
"learning_rate": 9.575542662726756e-08,
"logits": -3.176128387451172,
"logps": -197.74447631835938,
"loss": 0.0422,
"objective": 0.036040760576725006,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.9166666865348816,
"regularize": 0.03604074567556381,
"step": 810
},
{
"dpo_loss": 0.5344857573509216,
"epoch": 4.619744922059518,
"grad_norm": 4.571244888107197,
"learning_rate": 8.26378225816582e-08,
"logits": -3.02875018119812,
"logps": -193.68545532226562,
"loss": 0.0411,
"objective": 0.03461510315537453,
"ranking_idealized": 0.9416666626930237,
"ranking_idealized_expo": 0.6333333253860474,
"ranking_simple": 0.925000011920929,
"regularize": 0.03461508825421333,
"step": 815
},
{
"dpo_loss": 0.5378891825675964,
"epoch": 4.648086915446386,
"grad_norm": 4.696564500526644,
"learning_rate": 7.047107919114588e-08,
"logits": -3.148911952972412,
"logps": -204.34703063964844,
"loss": 0.0412,
"objective": 0.040915556252002716,
"ranking_idealized": 0.9333333373069763,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.925000011920929,
"regularize": 0.04091554507613182,
"step": 820
},
{
"dpo_loss": 0.5378555059432983,
"epoch": 4.6764289088332545,
"grad_norm": 4.6443623208845795,
"learning_rate": 5.92599822001666e-08,
"logits": -3.0313339233398438,
"logps": -200.1685028076172,
"loss": 0.0388,
"objective": 0.034796856343746185,
"ranking_idealized": 0.9125000238418579,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.8999999761581421,
"regularize": 0.03479684516787529,
"step": 825
},
{
"dpo_loss": 0.5235874056816101,
"epoch": 4.7047709022201225,
"grad_norm": 4.667685116333195,
"learning_rate": 4.9008941453107527e-08,
"logits": -3.2229866981506348,
"logps": -199.17506408691406,
"loss": 0.043,
"objective": 0.04358634725213051,
"ranking_idealized": 0.925000011920929,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.9166666865348816,
"regularize": 0.043586332350969315,
"step": 830
},
{
"dpo_loss": 0.5147577524185181,
"epoch": 4.733112895606991,
"grad_norm": 4.858854755852941,
"learning_rate": 3.972198915970976e-08,
"logits": -3.1338717937469482,
"logps": -205.56285095214844,
"loss": 0.0391,
"objective": 0.04283083602786064,
"ranking_idealized": 0.9291666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.8999999761581421,
"regularize": 0.04283082112669945,
"step": 835
},
{
"dpo_loss": 0.5037484765052795,
"epoch": 4.7614548889938595,
"grad_norm": 4.877052385921156,
"learning_rate": 3.1402778309014284e-08,
"logits": -3.141592502593994,
"logps": -206.25045776367188,
"loss": 0.0442,
"objective": 0.04478234797716141,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.9375,
"regularize": 0.04478234425187111,
"step": 840
},
{
"dpo_loss": 0.5356060266494751,
"epoch": 4.7897968823807275,
"grad_norm": 4.719985877621544,
"learning_rate": 2.4054581232470785e-08,
"logits": -3.155550241470337,
"logps": -196.71856689453125,
"loss": 0.0404,
"objective": 0.037482328712940216,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.9166666865348816,
"regularize": 0.03748232498764992,
"step": 845
},
{
"dpo_loss": 0.5255146026611328,
"epoch": 4.818138875767596,
"grad_norm": 5.213474190658553,
"learning_rate": 1.768028831677926e-08,
"logits": -3.0956904888153076,
"logps": -199.34555053710938,
"loss": 0.0408,
"objective": 0.03515857085585594,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.9125000238418579,
"regularize": 0.03515855222940445,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.6770739555358887,
"eval_logits": -3.2704193592071533,
"eval_logps": -195.50953674316406,
"eval_loss": 0.3951335549354553,
"eval_objective": 0.395561158657074,
"eval_ranking_idealized": 0.9194214940071106,
"eval_ranking_idealized_expo": 0.5309917330741882,
"eval_ranking_simple": 0.56611567735672,
"eval_regularize": 0.395561158657074,
"eval_runtime": 259.1953,
"eval_samples_per_second": 22.338,
"eval_steps_per_second": 0.934,
"step": 850
},
{
"dpo_loss": 0.5365945100784302,
"epoch": 4.846480869154464,
"grad_norm": 4.6357618577755,
"learning_rate": 1.2282406866966078e-08,
"logits": -3.0836923122406006,
"logps": -202.77923583984375,
"loss": 0.0369,
"objective": 0.03312551975250244,
"ranking_idealized": 0.9583333134651184,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.9375,
"regularize": 0.033125489950180054,
"step": 855
},
{
"dpo_loss": 0.5177373290061951,
"epoch": 4.874822862541333,
"grad_norm": 5.087309484062101,
"learning_rate": 7.863060120144316e-09,
"logits": -3.0513181686401367,
"logps": -197.80010986328125,
"loss": 0.0392,
"objective": 0.04462633281946182,
"ranking_idealized": 0.9125000238418579,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.9208333492279053,
"regularize": 0.04462629556655884,
"step": 860
},
{
"dpo_loss": 0.5258888006210327,
"epoch": 4.903164855928201,
"grad_norm": 4.542109406021276,
"learning_rate": 4.423986410346526e-09,
"logits": -3.1244866847991943,
"logps": -196.3852081298828,
"loss": 0.0396,
"objective": 0.05519362911581993,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.9375,
"regularize": 0.055193621665239334,
"step": 865
},
{
"dpo_loss": 0.5206807851791382,
"epoch": 4.931506849315069,
"grad_norm": 4.732937484587474,
"learning_rate": 1.9665384847583622e-09,
"logits": -3.1762211322784424,
"logps": -202.62925720214844,
"loss": 0.0378,
"objective": 0.0369645431637764,
"ranking_idealized": 0.9208333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.9166666865348816,
"regularize": 0.0369645357131958,
"step": 870
},
{
"dpo_loss": 0.5267921686172485,
"epoch": 4.959848842701937,
"grad_norm": 4.9576694103498475,
"learning_rate": 4.916829716183901e-10,
"logits": -3.104861259460449,
"logps": -203.89500427246094,
"loss": 0.041,
"objective": 0.03402137756347656,
"ranking_idealized": 0.9375,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.9208333492279053,
"regularize": 0.03402136638760567,
"step": 875
},
{
"dpo_loss": 0.5229869484901428,
"epoch": 4.988190836088805,
"grad_norm": 4.696769247024194,
"learning_rate": 0.0,
"logits": -3.1990513801574707,
"logps": -196.47311401367188,
"loss": 0.0352,
"objective": 0.0322914645075798,
"ranking_idealized": 0.9458333253860474,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.9458333253860474,
"regularize": 0.03229145333170891,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.1496292933313684,
"train_runtime": 35141.4223,
"train_samples_per_second": 7.228,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}