hZzy's picture
Model save
ef98ea9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 13.413600039235007,
"learning_rate": 5.681818181818181e-09,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6931512951850891,
"epoch": 0.02834199338686821,
"grad_norm": 13.318320815280419,
"learning_rate": 2.8409090909090908e-08,
"logits": -1.3678321838378906,
"logps": -84.44427490234375,
"loss": 0.4131,
"objective": 0.3755509555339813,
"ranking_idealized": 0.6510416865348816,
"ranking_idealized_expo": 0.5572916865348816,
"ranking_simple": 0.546875,
"regularize": 0.3755509555339813,
"step": 5
},
{
"dpo_loss": 0.6927531361579895,
"epoch": 0.05668398677373642,
"grad_norm": 13.050623089340824,
"learning_rate": 5.6818181818181815e-08,
"logits": -1.4463988542556763,
"logps": -83.39988708496094,
"loss": 0.4176,
"objective": 0.4423220753669739,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.512499988079071,
"regularize": 0.4423220753669739,
"step": 10
},
{
"dpo_loss": 0.6927918195724487,
"epoch": 0.08502598016060463,
"grad_norm": 12.549385306441062,
"learning_rate": 8.522727272727271e-08,
"logits": -1.4107797145843506,
"logps": -83.50421905517578,
"loss": 0.4254,
"objective": 0.41179904341697693,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.41179904341697693,
"step": 15
},
{
"dpo_loss": 0.6924694776535034,
"epoch": 0.11336797354747284,
"grad_norm": 13.269620119946596,
"learning_rate": 1.1363636363636363e-07,
"logits": -1.4003115892410278,
"logps": -84.06736755371094,
"loss": 0.4149,
"objective": 0.40317121148109436,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5375000238418579,
"regularize": 0.40317121148109436,
"step": 20
},
{
"dpo_loss": 0.6906281113624573,
"epoch": 0.14170996693434104,
"grad_norm": 12.65234373247132,
"learning_rate": 1.4204545454545455e-07,
"logits": -1.4490704536437988,
"logps": -83.72380065917969,
"loss": 0.412,
"objective": 0.4304184317588806,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5,
"regularize": 0.4304184317588806,
"step": 25
},
{
"dpo_loss": 0.6906370520591736,
"epoch": 0.17005196032120926,
"grad_norm": 13.419812147505471,
"learning_rate": 1.7045454545454543e-07,
"logits": -1.4248003959655762,
"logps": -84.09757232666016,
"loss": 0.4126,
"objective": 0.41593801975250244,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5333333611488342,
"regularize": 0.41593801975250244,
"step": 30
},
{
"dpo_loss": 0.6881809234619141,
"epoch": 0.19839395370807747,
"grad_norm": 13.431894879328123,
"learning_rate": 1.9886363636363636e-07,
"logits": -1.398374319076538,
"logps": -82.60546112060547,
"loss": 0.4095,
"objective": 0.3929609954357147,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5083333253860474,
"regularize": 0.3929609954357147,
"step": 35
},
{
"dpo_loss": 0.6868489384651184,
"epoch": 0.22673594709494568,
"grad_norm": 15.0250838416837,
"learning_rate": 2.2727272727272726e-07,
"logits": -1.3904410600662231,
"logps": -82.84651947021484,
"loss": 0.42,
"objective": 0.43919187784194946,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5166666507720947,
"regularize": 0.43919187784194946,
"step": 40
},
{
"dpo_loss": 0.6844364404678345,
"epoch": 0.25507794048181387,
"grad_norm": 13.128806663839857,
"learning_rate": 2.5568181818181816e-07,
"logits": -1.5230154991149902,
"logps": -84.21646118164062,
"loss": 0.4194,
"objective": 0.4717731177806854,
"ranking_idealized": 0.7208333611488342,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 0.4717731177806854,
"step": 45
},
{
"dpo_loss": 0.6831071376800537,
"epoch": 0.2834199338686821,
"grad_norm": 12.39410793472882,
"learning_rate": 2.840909090909091e-07,
"logits": -1.431780219078064,
"logps": -82.2941665649414,
"loss": 0.4122,
"objective": 0.3948862850666046,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.44999998807907104,
"regularize": 0.3948862850666046,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6914567947387695,
"eval_logits": -1.4614633321762085,
"eval_logps": -90.56139373779297,
"eval_loss": 0.4102250635623932,
"eval_objective": 0.40930914878845215,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5123966932296753,
"eval_regularize": 0.40930914878845215,
"eval_runtime": 260.1383,
"eval_samples_per_second": 22.257,
"eval_steps_per_second": 0.93,
"step": 50
},
{
"dpo_loss": 0.6807647943496704,
"epoch": 0.3117619272555503,
"grad_norm": 13.979478083508853,
"learning_rate": 3.1249999999999997e-07,
"logits": -1.4781759977340698,
"logps": -84.0101089477539,
"loss": 0.4052,
"objective": 0.4063163101673126,
"ranking_idealized": 0.7124999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.4063163101673126,
"step": 55
},
{
"dpo_loss": 0.6824926137924194,
"epoch": 0.3401039206424185,
"grad_norm": 13.484676530515722,
"learning_rate": 3.4090909090909085e-07,
"logits": -1.4679373502731323,
"logps": -83.09486389160156,
"loss": 0.3992,
"objective": 0.387731671333313,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 0.387731671333313,
"step": 60
},
{
"dpo_loss": 0.6788213849067688,
"epoch": 0.3684459140292867,
"grad_norm": 13.535493104004898,
"learning_rate": 3.693181818181818e-07,
"logits": -1.4250341653823853,
"logps": -83.52283477783203,
"loss": 0.3842,
"objective": 0.3719988465309143,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5083333253860474,
"regularize": 0.3719988465309143,
"step": 65
},
{
"dpo_loss": 0.6763210296630859,
"epoch": 0.39678790741615494,
"grad_norm": 13.25897931133664,
"learning_rate": 3.977272727272727e-07,
"logits": -1.5077797174453735,
"logps": -85.39080047607422,
"loss": 0.3855,
"objective": 0.42043933272361755,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5291666388511658,
"regularize": 0.42043933272361755,
"step": 70
},
{
"dpo_loss": 0.678033709526062,
"epoch": 0.42512990080302315,
"grad_norm": 14.035157652400327,
"learning_rate": 4.2613636363636364e-07,
"logits": -1.5349814891815186,
"logps": -86.0143051147461,
"loss": 0.3945,
"objective": 0.41438591480255127,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5666666626930237,
"regularize": 0.41438591480255127,
"step": 75
},
{
"dpo_loss": 0.6748775243759155,
"epoch": 0.45347189418989137,
"grad_norm": 13.539091864104346,
"learning_rate": 4.545454545454545e-07,
"logits": -1.5832253694534302,
"logps": -85.59701538085938,
"loss": 0.3789,
"objective": 0.37422579526901245,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5333333611488342,
"regularize": 0.37422579526901245,
"step": 80
},
{
"dpo_loss": 0.6750870943069458,
"epoch": 0.4818138875767596,
"grad_norm": 12.78905385712093,
"learning_rate": 4.829545454545455e-07,
"logits": -1.5551499128341675,
"logps": -84.24475860595703,
"loss": 0.374,
"objective": 0.39821094274520874,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5375000238418579,
"regularize": 0.39821094274520874,
"step": 85
},
{
"dpo_loss": 0.6703960299491882,
"epoch": 0.5101558809636277,
"grad_norm": 14.26040681218726,
"learning_rate": 4.999921328558332e-07,
"logits": -1.37662935256958,
"logps": -86.21568298339844,
"loss": 0.3761,
"objective": 0.3837045729160309,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.4833333194255829,
"regularize": 0.3837045729160309,
"step": 90
},
{
"dpo_loss": 0.658724844455719,
"epoch": 0.538497874350496,
"grad_norm": 14.003866252787525,
"learning_rate": 4.999036331701828e-07,
"logits": -1.4695987701416016,
"logps": -85.49458312988281,
"loss": 0.3642,
"objective": 0.39033612608909607,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.5666666626930237,
"regularize": 0.39033612608909607,
"step": 95
},
{
"dpo_loss": 0.6546652317047119,
"epoch": 0.5668398677373642,
"grad_norm": 13.876424116810778,
"learning_rate": 4.99716834795752e-07,
"logits": -1.5616024732589722,
"logps": -86.23612213134766,
"loss": 0.374,
"objective": 0.4149954915046692,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5458333492279053,
"regularize": 0.4149954915046692,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.6882808208465576,
"eval_logits": -1.5521211624145508,
"eval_logps": -92.03614807128906,
"eval_loss": 0.4072900712490082,
"eval_objective": 0.40819329023361206,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5144628286361694,
"eval_regularize": 0.40819329023361206,
"eval_runtime": 258.9254,
"eval_samples_per_second": 22.362,
"eval_steps_per_second": 0.935,
"step": 100
},
{
"dpo_loss": 0.6509627103805542,
"epoch": 0.5951818611242324,
"grad_norm": 15.031011715031442,
"learning_rate": 4.994318112090048e-07,
"logits": -1.4410721063613892,
"logps": -85.85182189941406,
"loss": 0.3711,
"objective": 0.3764886260032654,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.574999988079071,
"regularize": 0.3764886260032654,
"step": 105
},
{
"dpo_loss": 0.6576470136642456,
"epoch": 0.6235238545111006,
"grad_norm": 16.068373019347053,
"learning_rate": 4.990486745229364e-07,
"logits": -1.6439845561981201,
"logps": -84.1036376953125,
"loss": 0.3694,
"objective": 0.39763620495796204,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.48750001192092896,
"regularize": 0.39763620495796204,
"step": 110
},
{
"dpo_loss": 0.6543448567390442,
"epoch": 0.6518658478979689,
"grad_norm": 15.729444594038945,
"learning_rate": 4.985675754429743e-07,
"logits": -1.6000815629959106,
"logps": -83.94436645507812,
"loss": 0.3477,
"objective": 0.3455929458141327,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.3455929458141327,
"step": 115
},
{
"dpo_loss": 0.6514815092086792,
"epoch": 0.680207841284837,
"grad_norm": 14.396427732147952,
"learning_rate": 4.979887032076988e-07,
"logits": -1.5459378957748413,
"logps": -85.23513793945312,
"loss": 0.3534,
"objective": 0.3168259561061859,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3168259561061859,
"step": 120
},
{
"dpo_loss": 0.6492612957954407,
"epoch": 0.7085498346717053,
"grad_norm": 16.706445645247783,
"learning_rate": 4.973122855144065e-07,
"logits": -1.5174397230148315,
"logps": -86.0051040649414,
"loss": 0.3448,
"objective": 0.3529473543167114,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.5708333253860474,
"regularize": 0.3529473543167114,
"step": 125
},
{
"dpo_loss": 0.65309077501297,
"epoch": 0.7368918280585735,
"grad_norm": 15.417556754357976,
"learning_rate": 4.965385884295466e-07,
"logits": -1.664696455001831,
"logps": -85.23889923095703,
"loss": 0.3464,
"objective": 0.33712950348854065,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.4958333373069763,
"regularize": 0.33712950348854065,
"step": 130
},
{
"dpo_loss": 0.6549941301345825,
"epoch": 0.7652338214454416,
"grad_norm": 13.59480500578719,
"learning_rate": 4.956679162840645e-07,
"logits": -1.626897931098938,
"logps": -86.90068817138672,
"loss": 0.3309,
"objective": 0.34302666783332825,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5458333492279053,
"regularize": 0.34302666783332825,
"step": 135
},
{
"dpo_loss": 0.6500818729400635,
"epoch": 0.7935758148323099,
"grad_norm": 14.79485288903614,
"learning_rate": 4.947006115536947e-07,
"logits": -1.523794412612915,
"logps": -86.5340576171875,
"loss": 0.3244,
"objective": 0.3356337249279022,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5166666507720947,
"regularize": 0.3356337249279022,
"step": 140
},
{
"dpo_loss": 0.6436840295791626,
"epoch": 0.821917808219178,
"grad_norm": 14.29078834943314,
"learning_rate": 4.936370547242482e-07,
"logits": -1.5991618633270264,
"logps": -86.87805938720703,
"loss": 0.3315,
"objective": 0.35039833188056946,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5,
"regularize": 0.35039833188056946,
"step": 145
},
{
"dpo_loss": 0.6453251242637634,
"epoch": 0.8502598016060463,
"grad_norm": 14.662823673975787,
"learning_rate": 4.924776641419512e-07,
"logits": -1.5607432126998901,
"logps": -84.04727935791016,
"loss": 0.3231,
"objective": 0.32859519124031067,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.32859519124031067,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6880838871002197,
"eval_logits": -1.6073634624481201,
"eval_logps": -92.8072509765625,
"eval_loss": 0.40292537212371826,
"eval_objective": 0.4087039530277252,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5185950398445129,
"eval_regularize": 0.4087039530277252,
"eval_runtime": 258.55,
"eval_samples_per_second": 22.394,
"eval_steps_per_second": 0.936,
"step": 150
},
{
"dpo_loss": 0.6386777758598328,
"epoch": 0.8786017949929145,
"grad_norm": 14.231064502358223,
"learning_rate": 4.912228958488892e-07,
"logits": -1.5854390859603882,
"logps": -84.10832214355469,
"loss": 0.3257,
"objective": 0.3301841616630554,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5916666388511658,
"regularize": 0.3301841616630554,
"step": 155
},
{
"dpo_loss": 0.635101854801178,
"epoch": 0.9069437883797827,
"grad_norm": 16.2268120086952,
"learning_rate": 4.898732434036243e-07,
"logits": -1.4904930591583252,
"logps": -86.09799194335938,
"loss": 0.3107,
"objective": 0.32787373661994934,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5458333492279053,
"regularize": 0.32787373661994934,
"step": 160
},
{
"dpo_loss": 0.632634162902832,
"epoch": 0.9352857817666509,
"grad_norm": 16.041101199008867,
"learning_rate": 4.884292376870567e-07,
"logits": -1.5242409706115723,
"logps": -86.48987579345703,
"loss": 0.3212,
"objective": 0.3137226700782776,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.574999988079071,
"regularize": 0.3137226700782776,
"step": 165
},
{
"dpo_loss": 0.6288425922393799,
"epoch": 0.9636277751535192,
"grad_norm": 16.89173365453321,
"learning_rate": 4.868914466936037e-07,
"logits": -1.5360677242279053,
"logps": -86.72618103027344,
"loss": 0.3151,
"objective": 0.30297866463661194,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5708333253860474,
"regularize": 0.30297866463661194,
"step": 170
},
{
"dpo_loss": 0.6335378289222717,
"epoch": 0.9919697685403873,
"grad_norm": 15.347989877166441,
"learning_rate": 4.852604753077817e-07,
"logits": -1.4790997505187988,
"logps": -87.8569107055664,
"loss": 0.3103,
"objective": 0.29884618520736694,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6166666746139526,
"regularize": 0.29884618520736694,
"step": 175
},
{
"dpo_loss": 0.6220327615737915,
"epoch": 1.0203117619272555,
"grad_norm": 15.022798279596007,
"learning_rate": 4.835369650662767e-07,
"logits": -1.6809762716293335,
"logps": -87.00578308105469,
"loss": 0.2902,
"objective": 0.3023075461387634,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5874999761581421,
"regularize": 0.3023075461387634,
"step": 180
},
{
"dpo_loss": 0.6156979203224182,
"epoch": 1.0486537553141237,
"grad_norm": 17.673807880039096,
"learning_rate": 4.817215939055985e-07,
"logits": -1.54806387424469,
"logps": -86.16964721679688,
"loss": 0.2856,
"objective": 0.30200377106666565,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5416666865348816,
"regularize": 0.30200377106666565,
"step": 185
},
{
"dpo_loss": 0.6105552315711975,
"epoch": 1.076995748700992,
"grad_norm": 16.47130075175902,
"learning_rate": 4.798150758954164e-07,
"logits": -1.6065795421600342,
"logps": -88.57856750488281,
"loss": 0.2661,
"objective": 0.23887412250041962,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6041666865348816,
"regularize": 0.23887412250041962,
"step": 190
},
{
"dpo_loss": 0.6094806790351868,
"epoch": 1.10533774208786,
"grad_norm": 15.979183042956787,
"learning_rate": 4.778181609576831e-07,
"logits": -1.58108651638031,
"logps": -86.33049011230469,
"loss": 0.2734,
"objective": 0.23615716397762299,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.23615716397762299,
"step": 195
},
{
"dpo_loss": 0.6135362982749939,
"epoch": 1.1336797354747283,
"grad_norm": 15.241345178579065,
"learning_rate": 4.757316345716553e-07,
"logits": -1.6668376922607422,
"logps": -85.64834594726562,
"loss": 0.267,
"objective": 0.2661064565181732,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5541666746139526,
"regularize": 0.2661064565181732,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6866354942321777,
"eval_logits": -1.6423935890197754,
"eval_logps": -94.7991943359375,
"eval_loss": 0.4068562686443329,
"eval_objective": 0.41099515557289124,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5185950398445129,
"eval_regularize": 0.41099515557289124,
"eval_runtime": 258.6603,
"eval_samples_per_second": 22.385,
"eval_steps_per_second": 0.936,
"step": 200
},
{
"dpo_loss": 0.6104326844215393,
"epoch": 1.1620217288615966,
"grad_norm": 16.09148449696529,
"learning_rate": 4.735563174649278e-07,
"logits": -1.6373622417449951,
"logps": -88.22838592529297,
"loss": 0.2748,
"objective": 0.2591724991798401,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5541666746139526,
"regularize": 0.2591724991798401,
"step": 205
},
{
"dpo_loss": 0.6027090549468994,
"epoch": 1.1903637222484649,
"grad_norm": 16.78316844909737,
"learning_rate": 4.7129306529060407e-07,
"logits": -1.604967474937439,
"logps": -88.00846099853516,
"loss": 0.2647,
"objective": 0.28820380568504333,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5541666746139526,
"regularize": 0.28820380568504333,
"step": 210
},
{
"dpo_loss": 0.6120165586471558,
"epoch": 1.2187057156353331,
"grad_norm": 17.38824297135803,
"learning_rate": 4.6894276829072786e-07,
"logits": -1.577252745628357,
"logps": -88.0232925415039,
"loss": 0.2457,
"objective": 0.2474772185087204,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5625,
"regularize": 0.2474772185087204,
"step": 215
},
{
"dpo_loss": 0.6058060526847839,
"epoch": 1.2470477090222012,
"grad_norm": 16.33918535416867,
"learning_rate": 4.6650635094610966e-07,
"logits": -1.5445390939712524,
"logps": -87.7970199584961,
"loss": 0.25,
"objective": 0.23831520974636078,
"ranking_idealized": 0.7208333611488342,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.637499988079071,
"regularize": 0.23831520974636078,
"step": 220
},
{
"dpo_loss": 0.6021844744682312,
"epoch": 1.2753897024090695,
"grad_norm": 14.989005292751132,
"learning_rate": 4.639847716126854e-07,
"logits": -1.6192957162857056,
"logps": -89.04407501220703,
"loss": 0.2607,
"objective": 0.26420968770980835,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6041666865348816,
"regularize": 0.26420965790748596,
"step": 225
},
{
"dpo_loss": 0.6108235120773315,
"epoch": 1.3037316957959377,
"grad_norm": 16.245321246774985,
"learning_rate": 4.6137902214455106e-07,
"logits": -1.5698676109313965,
"logps": -89.06554412841797,
"loss": 0.2454,
"objective": 0.24457047879695892,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5541666746139526,
"regularize": 0.24457047879695892,
"step": 230
},
{
"dpo_loss": 0.59207683801651,
"epoch": 1.3320736891828058,
"grad_norm": 16.42864016636988,
"learning_rate": 4.5869012750382004e-07,
"logits": -1.6616859436035156,
"logps": -87.82197570800781,
"loss": 0.2583,
"objective": 0.26300859451293945,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6041666865348816,
"regularize": 0.26300859451293945,
"step": 235
},
{
"dpo_loss": 0.5991641283035278,
"epoch": 1.360415682569674,
"grad_norm": 17.146089761318706,
"learning_rate": 4.5591914535745817e-07,
"logits": -1.5948702096939087,
"logps": -89.31143188476562,
"loss": 0.2442,
"objective": 0.25130581855773926,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5666666626930237,
"regularize": 0.25130581855773926,
"step": 240
},
{
"dpo_loss": 0.6016849279403687,
"epoch": 1.3887576759565423,
"grad_norm": 14.997203138603757,
"learning_rate": 4.5306716566125433e-07,
"logits": -1.6367671489715576,
"logps": -88.17431640625,
"loss": 0.2399,
"objective": 0.23935823142528534,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5833333134651184,
"regularize": 0.23935823142528534,
"step": 245
},
{
"dpo_loss": 0.5918813943862915,
"epoch": 1.4170996693434104,
"grad_norm": 15.668454928081044,
"learning_rate": 4.501353102310901e-07,
"logits": -1.5877238512039185,
"logps": -87.66322326660156,
"loss": 0.2432,
"objective": 0.2531537711620331,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5625,
"regularize": 0.2531537711620331,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6876620650291443,
"eval_logits": -1.6720653772354126,
"eval_logps": -96.13894653320312,
"eval_loss": 0.4107522666454315,
"eval_objective": 0.4137335419654846,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 0.4137335419654846,
"eval_runtime": 259.3309,
"eval_samples_per_second": 22.327,
"eval_steps_per_second": 0.933,
"step": 250
},
{
"dpo_loss": 0.5952737927436829,
"epoch": 1.4454416627302786,
"grad_norm": 16.40280338029817,
"learning_rate": 4.471247323016777e-07,
"logits": -1.5863794088363647,
"logps": -89.24433898925781,
"loss": 0.2442,
"objective": 0.2290959656238556,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6291666626930237,
"regularize": 0.2290959656238556,
"step": 255
},
{
"dpo_loss": 0.5954132080078125,
"epoch": 1.473783656117147,
"grad_norm": 16.11674277744465,
"learning_rate": 4.440366160729392e-07,
"logits": -1.6588572263717651,
"logps": -89.44280242919922,
"loss": 0.2443,
"objective": 0.2354036122560501,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5625,
"regularize": 0.2354036122560501,
"step": 260
},
{
"dpo_loss": 0.5868396759033203,
"epoch": 1.5021256495040152,
"grad_norm": 16.753008834337265,
"learning_rate": 4.4087217624420585e-07,
"logits": -1.6106855869293213,
"logps": -88.14371490478516,
"loss": 0.239,
"objective": 0.24956756830215454,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5958333611488342,
"regularize": 0.24956756830215454,
"step": 265
},
{
"dpo_loss": 0.5915893316268921,
"epoch": 1.5304676428908834,
"grad_norm": 17.579129679111187,
"learning_rate": 4.3763265753642055e-07,
"logits": -1.6173158884048462,
"logps": -90.8720703125,
"loss": 0.2421,
"objective": 0.24301743507385254,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5958333611488342,
"regularize": 0.24301742017269135,
"step": 270
},
{
"dpo_loss": 0.5958731770515442,
"epoch": 1.5588096362777515,
"grad_norm": 16.101798479127662,
"learning_rate": 4.34319334202531e-07,
"logits": -1.6187034845352173,
"logps": -90.12999725341797,
"loss": 0.2448,
"objective": 0.22899790108203888,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6208333373069763,
"regularize": 0.22899790108203888,
"step": 275
},
{
"dpo_loss": 0.5842003226280212,
"epoch": 1.5871516296646198,
"grad_norm": 15.709789047148108,
"learning_rate": 4.309335095262675e-07,
"logits": -1.5244942903518677,
"logps": -88.604248046875,
"loss": 0.2411,
"objective": 0.23850402235984802,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6000000238418579,
"regularize": 0.23850402235984802,
"step": 280
},
{
"dpo_loss": 0.5853084921836853,
"epoch": 1.615493623051488,
"grad_norm": 16.6854633771705,
"learning_rate": 4.274765153095007e-07,
"logits": -1.6502856016159058,
"logps": -89.77727508544922,
"loss": 0.2219,
"objective": 0.21514521539211273,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6041666865348816,
"regularize": 0.21514521539211273,
"step": 285
},
{
"dpo_loss": 0.5910848379135132,
"epoch": 1.643835616438356,
"grad_norm": 18.978761606300836,
"learning_rate": 4.239497113483819e-07,
"logits": -1.7089149951934814,
"logps": -86.87386322021484,
"loss": 0.2312,
"objective": 0.23006680607795715,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6291666626930237,
"regularize": 0.23006680607795715,
"step": 290
},
{
"dpo_loss": 0.5802692174911499,
"epoch": 1.6721776098252243,
"grad_norm": 16.652074965539576,
"learning_rate": 4.203544848984728e-07,
"logits": -1.5955086946487427,
"logps": -86.49956512451172,
"loss": 0.2276,
"objective": 0.23742005228996277,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5791666507720947,
"regularize": 0.23742005228996277,
"step": 295
},
{
"dpo_loss": 0.5916833281517029,
"epoch": 1.7005196032120926,
"grad_norm": 16.884463449554712,
"learning_rate": 4.166922501290729e-07,
"logits": -1.6546835899353027,
"logps": -88.2989730834961,
"loss": 0.2252,
"objective": 0.26394858956336975,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6208333373069763,
"regularize": 0.26394858956336975,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6866207718849182,
"eval_logits": -1.6648496389389038,
"eval_logps": -95.62443542480469,
"eval_loss": 0.410134494304657,
"eval_objective": 0.4137687385082245,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.4137687385082245,
"eval_runtime": 259.4458,
"eval_samples_per_second": 22.317,
"eval_steps_per_second": 0.933,
"step": 300
},
{
"dpo_loss": 0.5952399373054504,
"epoch": 1.7288615965989607,
"grad_norm": 16.24562342201146,
"learning_rate": 4.129644475669616e-07,
"logits": -1.6116312742233276,
"logps": -88.82595825195312,
"loss": 0.218,
"objective": 0.2242499738931656,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2242499738931656,
"step": 305
},
{
"dpo_loss": 0.583368182182312,
"epoch": 1.7572035899858292,
"grad_norm": 18.099666352463437,
"learning_rate": 4.0917254352977206e-07,
"logits": -1.7004183530807495,
"logps": -87.11441040039062,
"loss": 0.2283,
"objective": 0.2325660139322281,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.612500011920929,
"regularize": 0.2325660139322281,
"step": 310
},
{
"dpo_loss": 0.5933206677436829,
"epoch": 1.7855455833726972,
"grad_norm": 16.545516113765466,
"learning_rate": 4.053180295492202e-07,
"logits": -1.602583408355713,
"logps": -88.69900512695312,
"loss": 0.2287,
"objective": 0.21895338594913483,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5833333134651184,
"regularize": 0.21895337104797363,
"step": 315
},
{
"dpo_loss": 0.5876157283782959,
"epoch": 1.8138875767595655,
"grad_norm": 18.487916312721516,
"learning_rate": 4.0140242178441665e-07,
"logits": -1.6777514219284058,
"logps": -90.22407531738281,
"loss": 0.2153,
"objective": 0.20208925008773804,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5874999761581421,
"regularize": 0.20208925008773804,
"step": 320
},
{
"dpo_loss": 0.5746586918830872,
"epoch": 1.8422295701464337,
"grad_norm": 18.670087833334332,
"learning_rate": 3.9742726042549053e-07,
"logits": -1.7464016675949097,
"logps": -91.97502899169922,
"loss": 0.219,
"objective": 0.2114688903093338,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6416666507720947,
"regularize": 0.2114688903093338,
"step": 325
},
{
"dpo_loss": 0.5767069458961487,
"epoch": 1.8705715635333018,
"grad_norm": 18.91235181922618,
"learning_rate": 3.933941090877615e-07,
"logits": -1.466091275215149,
"logps": -90.11954498291016,
"loss": 0.219,
"objective": 0.2171897292137146,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.574999988079071,
"regularize": 0.2171897292137146,
"step": 330
},
{
"dpo_loss": 0.5894278287887573,
"epoch": 1.89891355692017,
"grad_norm": 15.592599296406116,
"learning_rate": 3.8930455419669744e-07,
"logits": -1.6301844120025635,
"logps": -89.44200134277344,
"loss": 0.2112,
"objective": 0.18907961249351501,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6000000238418579,
"regularize": 0.18907961249351501,
"step": 335
},
{
"dpo_loss": 0.5853725075721741,
"epoch": 1.9272555503070383,
"grad_norm": 15.83418724261755,
"learning_rate": 3.851602043638994e-07,
"logits": -1.660121202468872,
"logps": -91.48560333251953,
"loss": 0.2026,
"objective": 0.18658672273159027,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6416666507720947,
"regularize": 0.18658672273159027,
"step": 340
},
{
"dpo_loss": 0.5825453400611877,
"epoch": 1.9555975436939064,
"grad_norm": 16.366640560133238,
"learning_rate": 3.809626897543604e-07,
"logits": -1.657557725906372,
"logps": -90.72650909423828,
"loss": 0.1961,
"objective": 0.18415075540542603,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6166666746139526,
"regularize": 0.18415075540542603,
"step": 345
},
{
"dpo_loss": 0.5838915109634399,
"epoch": 1.9839395370807746,
"grad_norm": 17.651439137685784,
"learning_rate": 3.7671366144524576e-07,
"logits": -1.551125407218933,
"logps": -91.74525451660156,
"loss": 0.2082,
"objective": 0.20508398115634918,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.5541666746139526,
"regularize": 0.20508398115634918,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6863144040107727,
"eval_logits": -1.6988588571548462,
"eval_logps": -97.52546691894531,
"eval_loss": 0.4102429747581482,
"eval_objective": 0.4131539762020111,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.51962810754776,
"eval_regularize": 0.4131539762020111,
"eval_runtime": 258.5165,
"eval_samples_per_second": 22.397,
"eval_steps_per_second": 0.936,
"step": 350
},
{
"dpo_loss": 0.5745717287063599,
"epoch": 2.012281530467643,
"grad_norm": 15.784086525377202,
"learning_rate": 3.724147907764478e-07,
"logits": -1.5323989391326904,
"logps": -90.18486785888672,
"loss": 0.2055,
"objective": 0.20713359117507935,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6458333134651184,
"regularize": 0.20713359117507935,
"step": 355
},
{
"dpo_loss": 0.5675494074821472,
"epoch": 2.040623523854511,
"grad_norm": 20.55210866626824,
"learning_rate": 3.6806776869317067e-07,
"logits": -1.6239458322525024,
"logps": -89.69377899169922,
"loss": 0.1726,
"objective": 0.17787505686283112,
"ranking_idealized": 0.7250000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.675000011920929,
"regularize": 0.17787505686283112,
"step": 360
},
{
"dpo_loss": 0.5650666952133179,
"epoch": 2.0689655172413794,
"grad_norm": 16.544231581396616,
"learning_rate": 3.636743050808028e-07,
"logits": -1.6872822046279907,
"logps": -91.26659393310547,
"loss": 0.1866,
"objective": 0.16895455121994019,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6333333253860474,
"regularize": 0.16895455121994019,
"step": 365
},
{
"dpo_loss": 0.575705349445343,
"epoch": 2.0973075106282475,
"grad_norm": 16.758890304778106,
"learning_rate": 3.5923612809233984e-07,
"logits": -1.662663221359253,
"logps": -87.82825469970703,
"loss": 0.1679,
"objective": 0.1752353459596634,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.574999988079071,
"regularize": 0.1752353310585022,
"step": 370
},
{
"dpo_loss": 0.5633853077888489,
"epoch": 2.1256495040151155,
"grad_norm": 16.99783941953761,
"learning_rate": 3.5475498346862214e-07,
"logits": -1.6271302700042725,
"logps": -91.13916015625,
"loss": 0.1726,
"objective": 0.16911908984184265,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.6791666746139526,
"regularize": 0.16911907494068146,
"step": 375
},
{
"dpo_loss": 0.5752108097076416,
"epoch": 2.153991497401984,
"grad_norm": 17.23340187781712,
"learning_rate": 3.502326338516534e-07,
"logits": -1.5394021272659302,
"logps": -89.99533081054688,
"loss": 0.179,
"objective": 0.1650255024433136,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1650255024433136,
"step": 380
},
{
"dpo_loss": 0.571977972984314,
"epoch": 2.182333490788852,
"grad_norm": 15.78796183229778,
"learning_rate": 3.4567085809127245e-07,
"logits": -1.6716177463531494,
"logps": -91.3305892944336,
"loss": 0.1653,
"objective": 0.13291777670383453,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.612500011920929,
"regularize": 0.13291777670383453,
"step": 385
},
{
"dpo_loss": 0.5752423405647278,
"epoch": 2.21067548417572,
"grad_norm": 18.129151048308177,
"learning_rate": 3.4107145054544855e-07,
"logits": -1.5358682870864868,
"logps": -91.15263366699219,
"loss": 0.1744,
"objective": 0.16379062831401825,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5625,
"regularize": 0.16379062831401825,
"step": 390
},
{
"dpo_loss": 0.5485681891441345,
"epoch": 2.2390174775625886,
"grad_norm": 16.313781937896024,
"learning_rate": 3.3643622037447767e-07,
"logits": -1.5593619346618652,
"logps": -92.42921447753906,
"loss": 0.1776,
"objective": 0.1637614667415619,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6416666507720947,
"regularize": 0.1637614667415619,
"step": 395
},
{
"dpo_loss": 0.5597947239875793,
"epoch": 2.2673594709494567,
"grad_norm": 16.659127876259,
"learning_rate": 3.317669908293554e-07,
"logits": -1.631813645362854,
"logps": -92.92410278320312,
"loss": 0.1825,
"objective": 0.196553573012352,
"ranking_idealized": 0.7583333253860474,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.6958333253860474,
"regularize": 0.1965535581111908,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6862595677375793,
"eval_logits": -1.6931663751602173,
"eval_logps": -97.79962158203125,
"eval_loss": 0.4124037027359009,
"eval_objective": 0.4144473969936371,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5206611752510071,
"eval_regularize": 0.4144473969936371,
"eval_runtime": 258.4529,
"eval_samples_per_second": 22.403,
"eval_steps_per_second": 0.936,
"step": 400
},
{
"dpo_loss": 0.5691500902175903,
"epoch": 2.295701464336325,
"grad_norm": 17.30117286858182,
"learning_rate": 3.270655985346081e-07,
"logits": -1.7139372825622559,
"logps": -89.78938293457031,
"loss": 0.1717,
"objective": 0.18224166333675385,
"ranking_idealized": 0.737500011920929,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6458333134651184,
"regularize": 0.18224166333675385,
"step": 405
},
{
"dpo_loss": 0.5579439997673035,
"epoch": 2.324043457723193,
"grad_norm": 17.123218301010457,
"learning_rate": 3.223338927658632e-07,
"logits": -1.5741162300109863,
"logps": -91.07009887695312,
"loss": 0.1618,
"objective": 0.15759395062923431,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5958333611488342,
"regularize": 0.15759395062923431,
"step": 410
},
{
"dpo_loss": 0.5704253315925598,
"epoch": 2.3523854511100613,
"grad_norm": 17.68931154440285,
"learning_rate": 3.175737347224432e-07,
"logits": -1.6476367712020874,
"logps": -91.30075073242188,
"loss": 0.1732,
"objective": 0.19281157851219177,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.612500011920929,
"regularize": 0.19281157851219177,
"step": 415
},
{
"dpo_loss": 0.5771389603614807,
"epoch": 2.3807274444969297,
"grad_norm": 17.274582557860825,
"learning_rate": 3.1278699679526975e-07,
"logits": -1.5415838956832886,
"logps": -92.63572692871094,
"loss": 0.1579,
"objective": 0.15308959782123566,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.5958333611488342,
"regularize": 0.15308959782123566,
"step": 420
},
{
"dpo_loss": 0.566936731338501,
"epoch": 2.409069437883798,
"grad_norm": 16.445557447346342,
"learning_rate": 3.0797556183036575e-07,
"logits": -1.5967096090316772,
"logps": -91.4622802734375,
"loss": 0.1607,
"objective": 0.16068215668201447,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6000000238418579,
"regularize": 0.16068214178085327,
"step": 425
},
{
"dpo_loss": 0.5632474422454834,
"epoch": 2.4374114312706663,
"grad_norm": 15.962055488306607,
"learning_rate": 3.0314132238824415e-07,
"logits": -1.6247813701629639,
"logps": -92.1604995727539,
"loss": 0.1547,
"objective": 0.1360505074262619,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.625,
"regularize": 0.1360505074262619,
"step": 430
},
{
"dpo_loss": 0.566851019859314,
"epoch": 2.4657534246575343,
"grad_norm": 16.006081940650837,
"learning_rate": 2.982861799994764e-07,
"logits": -1.6544443368911743,
"logps": -92.63692474365234,
"loss": 0.1637,
"objective": 0.17756709456443787,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6208333373069763,
"regularize": 0.17756709456443787,
"step": 435
},
{
"dpo_loss": 0.5565729141235352,
"epoch": 2.4940954180444024,
"grad_norm": 17.37344722468487,
"learning_rate": 2.934120444167326e-07,
"logits": -1.5883994102478027,
"logps": -91.88066101074219,
"loss": 0.159,
"objective": 0.15150482952594757,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6041666865348816,
"regularize": 0.15150482952594757,
"step": 440
},
{
"dpo_loss": 0.565682590007782,
"epoch": 2.5224374114312704,
"grad_norm": 18.453788667979182,
"learning_rate": 2.885208328635864e-07,
"logits": -1.6123565435409546,
"logps": -89.5006332397461,
"loss": 0.1576,
"objective": 0.1587233543395996,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6458333134651184,
"regularize": 0.1587233543395996,
"step": 445
},
{
"dpo_loss": 0.5824019312858582,
"epoch": 2.550779404818139,
"grad_norm": 19.424550718198045,
"learning_rate": 2.83614469280383e-07,
"logits": -1.6537593603134155,
"logps": -91.4095230102539,
"loss": 0.1504,
"objective": 0.15120406448841095,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5791666507720947,
"regularize": 0.15120406448841095,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6864377856254578,
"eval_logits": -1.711348056793213,
"eval_logps": -99.202880859375,
"eval_loss": 0.41492125391960144,
"eval_objective": 0.4176488518714905,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.4176488518714905,
"eval_runtime": 258.9375,
"eval_samples_per_second": 22.361,
"eval_steps_per_second": 0.935,
"step": 450
},
{
"dpo_loss": 0.5587320923805237,
"epoch": 2.579121398205007,
"grad_norm": 18.174711126742732,
"learning_rate": 2.786948835674634e-07,
"logits": -1.6923545598983765,
"logps": -92.0631103515625,
"loss": 0.1514,
"objective": 0.15467478334903717,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6416666507720947,
"regularize": 0.15467478334903717,
"step": 455
},
{
"dpo_loss": 0.5529297590255737,
"epoch": 2.6074633915918755,
"grad_norm": 18.378396938924546,
"learning_rate": 2.737640108260456e-07,
"logits": -1.765284776687622,
"logps": -92.5921401977539,
"loss": 0.1544,
"objective": 0.13981758058071136,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6291666626930237,
"regularize": 0.13981756567955017,
"step": 460
},
{
"dpo_loss": 0.5604754090309143,
"epoch": 2.6358053849787435,
"grad_norm": 17.16312208138119,
"learning_rate": 2.6882379059705953e-07,
"logits": -1.6412590742111206,
"logps": -91.83204650878906,
"loss": 0.1571,
"objective": 0.15992027521133423,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5791666507720947,
"regularize": 0.15992026031017303,
"step": 465
},
{
"dpo_loss": 0.5741956830024719,
"epoch": 2.6641473783656116,
"grad_norm": 17.444271577746782,
"learning_rate": 2.6387616609823504e-07,
"logits": -1.6750518083572388,
"logps": -91.33477020263672,
"loss": 0.151,
"objective": 0.17329135537147522,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.625,
"regularize": 0.17329135537147522,
"step": 470
},
{
"dpo_loss": 0.5746079087257385,
"epoch": 2.69248937175248,
"grad_norm": 17.607595627923466,
"learning_rate": 2.5892308345974514e-07,
"logits": -1.6217347383499146,
"logps": -90.19564819335938,
"loss": 0.1521,
"objective": 0.1534017026424408,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6166666746139526,
"regularize": 0.1534017026424408,
"step": 475
},
{
"dpo_loss": 0.56805020570755,
"epoch": 2.720831365139348,
"grad_norm": 18.441983400540806,
"learning_rate": 2.53966490958702e-07,
"logits": -1.7197903394699097,
"logps": -90.20177459716797,
"loss": 0.148,
"objective": 0.14620445668697357,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6291666626930237,
"regularize": 0.14620442688465118,
"step": 480
},
{
"dpo_loss": 0.5559974312782288,
"epoch": 2.7491733585262166,
"grad_norm": 17.00220355810742,
"learning_rate": 2.4900833825280967e-07,
"logits": -1.628369927406311,
"logps": -93.048828125,
"loss": 0.1488,
"objective": 0.1451708972454071,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6583333611488342,
"regularize": 0.1451708972454071,
"step": 485
},
{
"dpo_loss": 0.555105984210968,
"epoch": 2.7775153519130846,
"grad_norm": 17.798810379621077,
"learning_rate": 2.4405057561347313e-07,
"logits": -1.647185206413269,
"logps": -90.4990463256836,
"loss": 0.1613,
"objective": 0.17200501263141632,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6208333373069763,
"regularize": 0.17200501263141632,
"step": 490
},
{
"dpo_loss": 0.5594576001167297,
"epoch": 2.8058573452999527,
"grad_norm": 18.076540126591944,
"learning_rate": 2.39095153158666e-07,
"logits": -1.6548616886138916,
"logps": -90.19225311279297,
"loss": 0.1504,
"objective": 0.1365150660276413,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6208333373069763,
"regularize": 0.1365150511264801,
"step": 495
},
{
"dpo_loss": 0.5552747845649719,
"epoch": 2.8341993386868207,
"grad_norm": 17.278782223651127,
"learning_rate": 2.3414402008585886e-07,
"logits": -1.6857832670211792,
"logps": -89.0853500366211,
"loss": 0.1494,
"objective": 0.15246258676052094,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6416666507720947,
"regularize": 0.15246258676052094,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6861580014228821,
"eval_logits": -1.7174702882766724,
"eval_logps": -99.17545318603516,
"eval_loss": 0.41525644063949585,
"eval_objective": 0.4182237386703491,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.4182237386703491,
"eval_runtime": 259.2438,
"eval_samples_per_second": 22.334,
"eval_steps_per_second": 0.933,
"step": 500
},
{
"dpo_loss": 0.5660989284515381,
"epoch": 2.862541332073689,
"grad_norm": 18.182680782212074,
"learning_rate": 2.2919912390530943e-07,
"logits": -1.6143929958343506,
"logps": -91.0888900756836,
"loss": 0.1437,
"objective": 0.16082407534122467,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6333333253860474,
"regularize": 0.16082406044006348,
"step": 505
},
{
"dpo_loss": 0.5675150752067566,
"epoch": 2.8908833254605573,
"grad_norm": 16.373132303441977,
"learning_rate": 2.2426240967401638e-07,
"logits": -1.5807684659957886,
"logps": -91.39689636230469,
"loss": 0.1433,
"objective": 0.1494457870721817,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6291666626930237,
"regularize": 0.1494457870721817,
"step": 510
},
{
"dpo_loss": 0.5627566576004028,
"epoch": 2.9192253188474258,
"grad_norm": 18.008132213394468,
"learning_rate": 2.1933581923063837e-07,
"logits": -1.7557440996170044,
"logps": -91.32353210449219,
"loss": 0.1448,
"objective": 0.13260915875434875,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6291666626930237,
"regularize": 0.13260914385318756,
"step": 515
},
{
"dpo_loss": 0.5646940469741821,
"epoch": 2.947567312234294,
"grad_norm": 17.30767973762921,
"learning_rate": 2.1442129043167873e-07,
"logits": -1.610668420791626,
"logps": -92.7865219116211,
"loss": 0.1368,
"objective": 0.11772733181715012,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6291666626930237,
"regularize": 0.11772733181715012,
"step": 520
},
{
"dpo_loss": 0.5658089518547058,
"epoch": 2.975909305621162,
"grad_norm": 18.116492800551395,
"learning_rate": 2.0952075638923652e-07,
"logits": -1.6272333860397339,
"logps": -92.43870544433594,
"loss": 0.1424,
"objective": 0.15236981213092804,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6416666507720947,
"regularize": 0.15236981213092804,
"step": 525
},
{
"dpo_loss": 0.5534684658050537,
"epoch": 3.0042512990080303,
"grad_norm": 18.337044286762765,
"learning_rate": 2.0463614471062435e-07,
"logits": -1.6210473775863647,
"logps": -91.47294616699219,
"loss": 0.1502,
"objective": 0.17477649450302124,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6291666626930237,
"regularize": 0.17477649450302124,
"step": 530
},
{
"dpo_loss": 0.5659457445144653,
"epoch": 3.0325932923948984,
"grad_norm": 16.444884726429134,
"learning_rate": 1.9976937674015026e-07,
"logits": -1.6844907999038696,
"logps": -93.2222671508789,
"loss": 0.1284,
"objective": 0.14268328249454498,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6583333611488342,
"regularize": 0.14268328249454498,
"step": 535
},
{
"dpo_loss": 0.5521051287651062,
"epoch": 3.0609352857817664,
"grad_norm": 19.963683437356444,
"learning_rate": 1.9492236680336483e-07,
"logits": -1.7760847806930542,
"logps": -90.89082336425781,
"loss": 0.1216,
"objective": 0.10329335182905197,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6541666388511658,
"regularize": 0.10329335182905197,
"step": 540
},
{
"dpo_loss": 0.5619763731956482,
"epoch": 3.089277279168635,
"grad_norm": 17.450130382767895,
"learning_rate": 1.9009702145406724e-07,
"logits": -1.6995065212249756,
"logps": -92.40625,
"loss": 0.1232,
"objective": 0.1230437308549881,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1230437308549881,
"step": 545
},
{
"dpo_loss": 0.5528106689453125,
"epoch": 3.117619272555503,
"grad_norm": 18.245098236126562,
"learning_rate": 1.8529523872436977e-07,
"logits": -1.5086556673049927,
"logps": -92.30103302001953,
"loss": 0.1407,
"objective": 0.12957319617271423,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6791666746139526,
"regularize": 0.12957318127155304,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.6856257915496826,
"eval_logits": -1.7183054685592651,
"eval_logps": -99.2997055053711,
"eval_loss": 0.4161340296268463,
"eval_objective": 0.41743505001068115,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.41743505001068115,
"eval_runtime": 258.7783,
"eval_samples_per_second": 22.374,
"eval_steps_per_second": 0.935,
"step": 550
},
{
"dpo_loss": 0.5473430752754211,
"epoch": 3.1459612659423715,
"grad_norm": 18.87722095427309,
"learning_rate": 1.8051890737811393e-07,
"logits": -1.6218358278274536,
"logps": -93.05738067626953,
"loss": 0.1336,
"objective": 0.1305130124092102,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6625000238418579,
"regularize": 0.1305130124092102,
"step": 555
},
{
"dpo_loss": 0.5478367209434509,
"epoch": 3.1743032593292395,
"grad_norm": 22.714698597290123,
"learning_rate": 1.7576990616793137e-07,
"logits": -1.601859211921692,
"logps": -90.21554565429688,
"loss": 0.1212,
"objective": 0.10795855522155762,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.637499988079071,
"regularize": 0.10795855522155762,
"step": 560
},
{
"dpo_loss": 0.5566601157188416,
"epoch": 3.2026452527161076,
"grad_norm": 24.322678833478967,
"learning_rate": 1.710501030962438e-07,
"logits": -1.663177728652954,
"logps": -91.7726058959961,
"loss": 0.1298,
"objective": 0.13216590881347656,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6458333134651184,
"regularize": 0.13216587901115417,
"step": 565
},
{
"dpo_loss": 0.5519458055496216,
"epoch": 3.230987246102976,
"grad_norm": 19.102063233264193,
"learning_rate": 1.663613546804912e-07,
"logits": -1.5763607025146484,
"logps": -91.98208618164062,
"loss": 0.1293,
"objective": 0.13738204538822174,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5916666388511658,
"regularize": 0.13738203048706055,
"step": 570
},
{
"dpo_loss": 0.5559364557266235,
"epoch": 3.259329239489844,
"grad_norm": 16.15481429380041,
"learning_rate": 1.617055052228768e-07,
"logits": -1.6705526113510132,
"logps": -92.17435455322266,
"loss": 0.1266,
"objective": 0.12801046669483185,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6499999761581421,
"regularize": 0.12801046669483185,
"step": 575
},
{
"dpo_loss": 0.5649384260177612,
"epoch": 3.287671232876712,
"grad_norm": 17.44743081337015,
"learning_rate": 1.5708438608491815e-07,
"logits": -1.6591442823410034,
"logps": -93.50952911376953,
"loss": 0.1277,
"objective": 0.11801984906196594,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6291666626930237,
"regularize": 0.11801984906196594,
"step": 580
},
{
"dpo_loss": 0.5595548152923584,
"epoch": 3.3160132262635806,
"grad_norm": 16.72082331684023,
"learning_rate": 1.524998149670871e-07,
"logits": -1.69523286819458,
"logps": -93.74117279052734,
"loss": 0.12,
"objective": 0.10769928246736526,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6583333611488342,
"regularize": 0.10769927501678467,
"step": 585
},
{
"dpo_loss": 0.561581015586853,
"epoch": 3.3443552196504487,
"grad_norm": 19.465809423510365,
"learning_rate": 1.479535951938243e-07,
"logits": -1.7049933671951294,
"logps": -93.83617401123047,
"loss": 0.1205,
"objective": 0.09809862077236176,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.637499988079071,
"regularize": 0.09809862077236176,
"step": 590
},
{
"dpo_loss": 0.5538628101348877,
"epoch": 3.372697213037317,
"grad_norm": 17.81052400873953,
"learning_rate": 1.43447515004208e-07,
"logits": -1.613613247871399,
"logps": -92.85578155517578,
"loss": 0.1191,
"objective": 0.12334737926721573,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.637499988079071,
"regularize": 0.12334737926721573,
"step": 595
},
{
"dpo_loss": 0.5357978940010071,
"epoch": 3.4010392064241852,
"grad_norm": 18.626853535104544,
"learning_rate": 1.3898334684855645e-07,
"logits": -1.624743938446045,
"logps": -92.40316009521484,
"loss": 0.1149,
"objective": 0.13463754951953888,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6458333134651184,
"regularize": 0.13463754951953888,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.6852067112922668,
"eval_logits": -1.71807062625885,
"eval_logps": -99.92455291748047,
"eval_loss": 0.41705650091171265,
"eval_objective": 0.41811424493789673,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.41811424493789673,
"eval_runtime": 259.0859,
"eval_samples_per_second": 22.348,
"eval_steps_per_second": 0.934,
"step": 600
},
{
"dpo_loss": 0.5652448534965515,
"epoch": 3.4293811998110533,
"grad_norm": 17.958926430591173,
"learning_rate": 1.3456284669124157e-07,
"logits": -1.6740020513534546,
"logps": -94.55862426757812,
"loss": 0.1179,
"objective": 0.11572790890932083,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.625,
"regularize": 0.11572788655757904,
"step": 605
},
{
"dpo_loss": 0.5543821454048157,
"epoch": 3.4577231931979218,
"grad_norm": 17.326848783729876,
"learning_rate": 1.301877533199859e-07,
"logits": -1.6315828561782837,
"logps": -92.49845886230469,
"loss": 0.1149,
"objective": 0.1067105308175087,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6499999761581421,
"regularize": 0.1067105159163475,
"step": 610
},
{
"dpo_loss": 0.5443283915519714,
"epoch": 3.48606518658479,
"grad_norm": 16.84586393500809,
"learning_rate": 1.2585978766191724e-07,
"logits": -1.664933681488037,
"logps": -93.27455139160156,
"loss": 0.1142,
"objective": 0.10945113748311996,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.5958333611488342,
"regularize": 0.10945113748311996,
"step": 615
},
{
"dpo_loss": 0.5419160723686218,
"epoch": 3.514407179971658,
"grad_norm": 17.32874521556865,
"learning_rate": 1.2158065210664848e-07,
"logits": -1.5332224369049072,
"logps": -92.34308624267578,
"loss": 0.1203,
"objective": 0.12084861099720001,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6166666746139526,
"regularize": 0.12084860354661942,
"step": 620
},
{
"dpo_loss": 0.5541211366653442,
"epoch": 3.5427491733585263,
"grad_norm": 17.48530471086995,
"learning_rate": 1.1735202983664802e-07,
"logits": -1.6171096563339233,
"logps": -91.3125991821289,
"loss": 0.1178,
"objective": 0.11125477403402328,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6416666507720947,
"regularize": 0.11125477403402328,
"step": 625
},
{
"dpo_loss": 0.5698776245117188,
"epoch": 3.5710911667453944,
"grad_norm": 17.864701578880954,
"learning_rate": 1.1317558416516696e-07,
"logits": -1.697689175605774,
"logps": -91.67240905761719,
"loss": 0.1261,
"objective": 0.13253255188465118,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6083333492279053,
"regularize": 0.13253255188465118,
"step": 630
},
{
"dpo_loss": 0.5599467158317566,
"epoch": 3.5994331601322624,
"grad_norm": 17.33519253157568,
"learning_rate": 1.090529578819799e-07,
"logits": -1.6461411714553833,
"logps": -91.57376098632812,
"loss": 0.1157,
"objective": 0.10732007026672363,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6208333373069763,
"regularize": 0.10732006281614304,
"step": 635
},
{
"dpo_loss": 0.540539562702179,
"epoch": 3.627775153519131,
"grad_norm": 17.025667462047203,
"learning_rate": 1.0498577260720048e-07,
"logits": -1.5717778205871582,
"logps": -93.14022827148438,
"loss": 0.1146,
"objective": 0.13500064611434937,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.574999988079071,
"regularize": 0.13500064611434937,
"step": 640
},
{
"dpo_loss": 0.5469278693199158,
"epoch": 3.656117146905999,
"grad_norm": 17.536092815770388,
"learning_rate": 1.0097562815342214e-07,
"logits": -1.6058826446533203,
"logps": -90.76680755615234,
"loss": 0.1144,
"objective": 0.1191474050283432,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6291666626930237,
"regularize": 0.1191474050283432,
"step": 645
},
{
"dpo_loss": 0.5611483454704285,
"epoch": 3.6844591402928675,
"grad_norm": 17.646206320924833,
"learning_rate": 9.702410189643836e-08,
"logits": -1.6121342182159424,
"logps": -92.83375549316406,
"loss": 0.1108,
"objective": 0.09943919628858566,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6083333492279053,
"regularize": 0.09943918883800507,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.6852837800979614,
"eval_logits": -1.7315040826797485,
"eval_logps": -99.91177368164062,
"eval_loss": 0.41784536838531494,
"eval_objective": 0.41884875297546387,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.41884875297546387,
"eval_runtime": 259.4431,
"eval_samples_per_second": 22.317,
"eval_steps_per_second": 0.933,
"step": 650
},
{
"dpo_loss": 0.5510907769203186,
"epoch": 3.7128011336797355,
"grad_norm": 17.84028807284025,
"learning_rate": 9.313274815478698e-08,
"logits": -1.6280105113983154,
"logps": -92.27388763427734,
"loss": 0.117,
"objective": 0.10249165445566177,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10249165445566177,
"step": 655
},
{
"dpo_loss": 0.5551621913909912,
"epoch": 3.7411431270666036,
"grad_norm": 17.863904309670215,
"learning_rate": 8.930309757836516e-08,
"logits": -1.7605994939804077,
"logps": -92.74076080322266,
"loss": 0.1162,
"objective": 0.11682406812906265,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6625000238418579,
"regularize": 0.11682406812906265,
"step": 660
},
{
"dpo_loss": 0.5553780198097229,
"epoch": 3.769485120453472,
"grad_norm": 17.287090327509993,
"learning_rate": 8.553665654635342e-08,
"logits": -1.6500779390335083,
"logps": -92.00687408447266,
"loss": 0.116,
"objective": 0.11367592960596085,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6541666388511658,
"regularize": 0.11367591470479965,
"step": 665
},
{
"dpo_loss": 0.5561904311180115,
"epoch": 3.79782711384034,
"grad_norm": 18.08647866984471,
"learning_rate": 8.183490657468686e-08,
"logits": -1.7430044412612915,
"logps": -92.36637878417969,
"loss": 0.1153,
"objective": 0.13086958229541779,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.7124999761581421,
"regularize": 0.13086958229541779,
"step": 670
},
{
"dpo_loss": 0.5525475740432739,
"epoch": 3.826169107227208,
"grad_norm": 17.678219555630616,
"learning_rate": 7.819930373330669e-08,
"logits": -1.6892848014831543,
"logps": -91.58055114746094,
"loss": 0.1165,
"objective": 0.10088498890399933,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6416666507720947,
"regularize": 0.10088498145341873,
"step": 675
},
{
"dpo_loss": 0.5708147883415222,
"epoch": 3.8545111006140766,
"grad_norm": 17.208804705028182,
"learning_rate": 7.463127807341966e-08,
"logits": -1.6462949514389038,
"logps": -92.41487884521484,
"loss": 0.1181,
"objective": 0.11868777871131897,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6208333373069763,
"regularize": 0.11868777871131897,
"step": 680
},
{
"dpo_loss": 0.5442604422569275,
"epoch": 3.8828530940009447,
"grad_norm": 18.600166194890996,
"learning_rate": 7.113223306499336e-08,
"logits": -1.7259678840637207,
"logps": -91.63528442382812,
"loss": 0.1127,
"objective": 0.10472600162029266,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6499999761581421,
"regularize": 0.10472600162029266,
"step": 685
},
{
"dpo_loss": 0.5496628284454346,
"epoch": 3.9111950873878127,
"grad_norm": 17.80496655704031,
"learning_rate": 6.770354504470574e-08,
"logits": -1.6540542840957642,
"logps": -90.78262329101562,
"loss": 0.1164,
"objective": 0.10735266655683517,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6875,
"regularize": 0.10735265165567398,
"step": 690
},
{
"dpo_loss": 0.5639461874961853,
"epoch": 3.9395370807746812,
"grad_norm": 16.492816826616206,
"learning_rate": 6.434656267456842e-08,
"logits": -1.6047898530960083,
"logps": -92.38011932373047,
"loss": 0.1193,
"objective": 0.12910698354244232,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.5833333134651184,
"regularize": 0.12910698354244232,
"step": 695
},
{
"dpo_loss": 0.5509793162345886,
"epoch": 3.9678790741615493,
"grad_norm": 19.17114576937693,
"learning_rate": 6.106260641143546e-08,
"logits": -1.6564711332321167,
"logps": -92.65071868896484,
"loss": 0.1146,
"objective": 0.1030283123254776,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.5958333611488342,
"regularize": 0.103028304874897,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.6854080557823181,
"eval_logits": -1.7319272756576538,
"eval_logps": -99.89824676513672,
"eval_loss": 0.4175797998905182,
"eval_objective": 0.4186650514602661,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.4186650514602661,
"eval_runtime": 258.7065,
"eval_samples_per_second": 22.381,
"eval_steps_per_second": 0.935,
"step": 700
},
{
"dpo_loss": 0.5571620464324951,
"epoch": 3.9962210675484178,
"grad_norm": 18.74884098276965,
"learning_rate": 5.7852967987606e-08,
"logits": -1.554320216178894,
"logps": -90.9109878540039,
"loss": 0.1135,
"objective": 0.12702669203281403,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6583333611488342,
"regularize": 0.12702666223049164,
"step": 705
},
{
"dpo_loss": 0.5528541803359985,
"epoch": 4.024563060935286,
"grad_norm": 17.40621172754528,
"learning_rate": 5.471890990272665e-08,
"logits": -1.6468113660812378,
"logps": -92.61128997802734,
"loss": 0.1135,
"objective": 0.1373264044523239,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.612500011920929,
"regularize": 0.13732638955116272,
"step": 710
},
{
"dpo_loss": 0.5556226968765259,
"epoch": 4.052905054322154,
"grad_norm": 18.812501686123863,
"learning_rate": 5.166166492719124e-08,
"logits": -1.6049120426177979,
"logps": -92.74799346923828,
"loss": 0.1035,
"objective": 0.10493499785661697,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6458333134651184,
"regularize": 0.10493497550487518,
"step": 715
},
{
"dpo_loss": 0.5424375534057617,
"epoch": 4.081247047709022,
"grad_norm": 17.435339866299028,
"learning_rate": 4.868243561723534e-08,
"logits": -1.511703372001648,
"logps": -94.71248626708984,
"loss": 0.0978,
"objective": 0.09467672556638718,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6166666746139526,
"regularize": 0.09467671811580658,
"step": 720
},
{
"dpo_loss": 0.5652304291725159,
"epoch": 4.109589041095891,
"grad_norm": 17.507850551258127,
"learning_rate": 4.578239384191529e-08,
"logits": -1.6384118795394897,
"logps": -92.1180191040039,
"loss": 0.1076,
"objective": 0.09941933304071426,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.637499988079071,
"regularize": 0.09941932559013367,
"step": 725
},
{
"dpo_loss": 0.5497789978981018,
"epoch": 4.137931034482759,
"grad_norm": 18.803004868454263,
"learning_rate": 4.296268032215733e-08,
"logits": -1.7138111591339111,
"logps": -91.83662414550781,
"loss": 0.1086,
"objective": 0.10822432488203049,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6208333373069763,
"regularize": 0.10822432488203049,
"step": 730
},
{
"dpo_loss": 0.55525803565979,
"epoch": 4.166273027869627,
"grad_norm": 18.177348382836357,
"learning_rate": 4.022440418205944e-08,
"logits": -1.6232236623764038,
"logps": -93.14463806152344,
"loss": 0.1028,
"objective": 0.10451411455869675,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6541666388511658,
"regularize": 0.10451411455869675,
"step": 735
},
{
"dpo_loss": 0.535234808921814,
"epoch": 4.194615021256495,
"grad_norm": 17.133543410858152,
"learning_rate": 3.756864251262143e-08,
"logits": -1.610323190689087,
"logps": -93.36137390136719,
"loss": 0.1062,
"objective": 0.10550294071435928,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6541666388511658,
"regularize": 0.10550292581319809,
"step": 740
},
{
"dpo_loss": 0.546442449092865,
"epoch": 4.222957014643363,
"grad_norm": 18.78547392108143,
"learning_rate": 3.4996439948074855e-08,
"logits": -1.6879092454910278,
"logps": -90.12301635742188,
"loss": 0.1001,
"objective": 0.10297367721796036,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6208333373069763,
"regularize": 0.10297366231679916,
"step": 745
},
{
"dpo_loss": 0.5416663289070129,
"epoch": 4.251299008030231,
"grad_norm": 19.462231175744662,
"learning_rate": 3.250880825498026e-08,
"logits": -1.8104737997055054,
"logps": -92.32807922363281,
"loss": 0.0986,
"objective": 0.10099396854639053,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10099395364522934,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.6853212714195251,
"eval_logits": -1.7322306632995605,
"eval_logps": -99.86943054199219,
"eval_loss": 0.41747406125068665,
"eval_objective": 0.41828420758247375,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.41828420758247375,
"eval_runtime": 258.964,
"eval_samples_per_second": 22.358,
"eval_steps_per_second": 0.934,
"step": 750
},
{
"dpo_loss": 0.5468146800994873,
"epoch": 4.2796410014171,
"grad_norm": 18.270752967286892,
"learning_rate": 3.010672593425209e-08,
"logits": -1.7138711214065552,
"logps": -92.11996459960938,
"loss": 0.1147,
"objective": 0.10049024224281311,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6458333134651184,
"regularize": 0.10049023479223251,
"step": 755
},
{
"dpo_loss": 0.5455428957939148,
"epoch": 4.307982994803968,
"grad_norm": 17.285027330879423,
"learning_rate": 2.7791137836269158e-08,
"logits": -1.6757961511611938,
"logps": -93.05391693115234,
"loss": 0.0961,
"objective": 0.10086000710725784,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6833333373069763,
"regularize": 0.10086000710725784,
"step": 760
},
{
"dpo_loss": 0.5395826101303101,
"epoch": 4.336324988190836,
"grad_norm": 17.680760382133624,
"learning_rate": 2.556295478922116e-08,
"logits": -1.7200431823730469,
"logps": -93.1734619140625,
"loss": 0.1053,
"objective": 0.12091321498155594,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6833333373069763,
"regularize": 0.12091320008039474,
"step": 765
},
{
"dpo_loss": 0.5380468964576721,
"epoch": 4.364666981577704,
"grad_norm": 17.361908211383366,
"learning_rate": 2.3423053240837514e-08,
"logits": -1.577264428138733,
"logps": -91.18030548095703,
"loss": 0.1064,
"objective": 0.11028440296649933,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.637499988079071,
"regularize": 0.11028438061475754,
"step": 770
},
{
"dpo_loss": 0.5494747161865234,
"epoch": 4.393008974964572,
"grad_norm": 16.640977636984772,
"learning_rate": 2.137227491364016e-08,
"logits": -1.627792239189148,
"logps": -91.97000885009766,
"loss": 0.1067,
"objective": 0.10595239698886871,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6333333253860474,
"regularize": 0.10595235228538513,
"step": 775
},
{
"dpo_loss": 0.5575358867645264,
"epoch": 4.42135096835144,
"grad_norm": 17.112345268128863,
"learning_rate": 1.9411426473854687e-08,
"logits": -1.693690538406372,
"logps": -90.5418472290039,
"loss": 0.1004,
"objective": 0.10788667947053909,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6208333373069763,
"regularize": 0.10788667947053909,
"step": 780
},
{
"dpo_loss": 0.5676646828651428,
"epoch": 4.449692961738309,
"grad_norm": 16.657419373072543,
"learning_rate": 1.7541279214111275e-08,
"logits": -1.7215303182601929,
"logps": -90.63499450683594,
"loss": 0.1128,
"objective": 0.1177934780716896,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.612500011920929,
"regularize": 0.1177934780716896,
"step": 785
},
{
"dpo_loss": 0.539345920085907,
"epoch": 4.478034955125177,
"grad_norm": 17.089680176209615,
"learning_rate": 1.57625687500596e-08,
"logits": -1.6345340013504028,
"logps": -93.4063720703125,
"loss": 0.0998,
"objective": 0.10817180573940277,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6916666626930237,
"regularize": 0.10817176848649979,
"step": 790
},
{
"dpo_loss": 0.547528088092804,
"epoch": 4.506376948512045,
"grad_norm": 17.320194779122446,
"learning_rate": 1.4075994731016894e-08,
"logits": -1.5627334117889404,
"logps": -93.30286407470703,
"loss": 0.1058,
"objective": 0.102629154920578,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6291666626930237,
"regularize": 0.10262913256883621,
"step": 795
},
{
"dpo_loss": 0.5486911535263062,
"epoch": 4.534718941898913,
"grad_norm": 17.9229448629881,
"learning_rate": 1.2482220564763667e-08,
"logits": -1.5870776176452637,
"logps": -92.80538940429688,
"loss": 0.1042,
"objective": 0.11460768431425095,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6499999761581421,
"regularize": 0.11460768431425095,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.6852768659591675,
"eval_logits": -1.7317209243774414,
"eval_logps": -99.85995483398438,
"eval_loss": 0.4175398349761963,
"eval_objective": 0.4183157980442047,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.4183157980442047,
"eval_runtime": 259.0856,
"eval_samples_per_second": 22.348,
"eval_steps_per_second": 0.934,
"step": 800
},
{
"dpo_loss": 0.5488670468330383,
"epoch": 4.563060935285781,
"grad_norm": 18.799592478883184,
"learning_rate": 1.0981873156594379e-08,
"logits": -1.627816081047058,
"logps": -91.32179260253906,
"loss": 0.1001,
"objective": 0.10988225042819977,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.6041666865348816,
"regularize": 0.10988224297761917,
"step": 805
},
{
"dpo_loss": 0.5488799810409546,
"epoch": 4.59140292867265,
"grad_norm": 16.566296936758206,
"learning_rate": 9.575542662726754e-09,
"logits": -1.7243562936782837,
"logps": -91.10765075683594,
"loss": 0.0996,
"objective": 0.08869278430938721,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.5833333134651184,
"regularize": 0.08869277685880661,
"step": 810
},
{
"dpo_loss": 0.5502530336380005,
"epoch": 4.619744922059518,
"grad_norm": 16.948787644578637,
"learning_rate": 8.263782258165819e-09,
"logits": -1.5700196027755737,
"logps": -92.37843322753906,
"loss": 0.0991,
"objective": 0.07977009564638138,
"ranking_idealized": 0.7250000238418579,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.6958333253860474,
"regularize": 0.07977008074522018,
"step": 815
},
{
"dpo_loss": 0.5550402402877808,
"epoch": 4.648086915446386,
"grad_norm": 21.501267763535818,
"learning_rate": 7.047107919114586e-09,
"logits": -1.6636712551116943,
"logps": -92.61454010009766,
"loss": 0.0982,
"objective": 0.08367303013801575,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6708333492279053,
"regularize": 0.08367302268743515,
"step": 820
},
{
"dpo_loss": 0.5588962435722351,
"epoch": 4.6764289088332545,
"grad_norm": 16.315710057694485,
"learning_rate": 5.925998220016659e-09,
"logits": -1.5499807596206665,
"logps": -90.22130584716797,
"loss": 0.1018,
"objective": 0.09301813691854477,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.5833333134651184,
"regularize": 0.09301812201738358,
"step": 825
},
{
"dpo_loss": 0.5498708486557007,
"epoch": 4.7047709022201225,
"grad_norm": 17.748669049129045,
"learning_rate": 4.9008941453107525e-09,
"logits": -1.7388263940811157,
"logps": -92.17695617675781,
"loss": 0.1092,
"objective": 0.11468993872404099,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6416666507720947,
"regularize": 0.1146899089217186,
"step": 830
},
{
"dpo_loss": 0.5327169299125671,
"epoch": 4.733112895606991,
"grad_norm": 17.389468155390862,
"learning_rate": 3.9721989159709754e-09,
"logits": -1.6580873727798462,
"logps": -92.22929382324219,
"loss": 0.1,
"objective": 0.09779965132474899,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6041666865348816,
"regularize": 0.0977996364235878,
"step": 835
},
{
"dpo_loss": 0.5324665904045105,
"epoch": 4.7614548889938595,
"grad_norm": 18.51697686947663,
"learning_rate": 3.140277830901428e-09,
"logits": -1.6570351123809814,
"logps": -92.75865173339844,
"loss": 0.1079,
"objective": 0.11856434494256973,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6499999761581421,
"regularize": 0.11856433004140854,
"step": 840
},
{
"dpo_loss": 0.560818612575531,
"epoch": 4.7897968823807275,
"grad_norm": 17.858581136510683,
"learning_rate": 2.4054581232470785e-09,
"logits": -1.6901015043258667,
"logps": -92.13179016113281,
"loss": 0.1067,
"objective": 0.10368030518293381,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.6208333373069763,
"regularize": 0.10368029028177261,
"step": 845
},
{
"dpo_loss": 0.5496495366096497,
"epoch": 4.818138875767596,
"grad_norm": 17.640915580271592,
"learning_rate": 1.7680288316779256e-09,
"logits": -1.6190950870513916,
"logps": -90.9464340209961,
"loss": 0.103,
"objective": 0.10453298687934875,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6333333253860474,
"regularize": 0.10453297942876816,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.6852567791938782,
"eval_logits": -1.732380986213684,
"eval_logps": -99.89720153808594,
"eval_loss": 0.4175875782966614,
"eval_objective": 0.4183763563632965,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5237603187561035,
"eval_regularize": 0.4183763563632965,
"eval_runtime": 259.1903,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 0.934,
"step": 850
},
{
"dpo_loss": 0.5584205389022827,
"epoch": 4.846480869154464,
"grad_norm": 17.56984774908714,
"learning_rate": 1.2282406866966078e-09,
"logits": -1.6185228824615479,
"logps": -91.83565521240234,
"loss": 0.0974,
"objective": 0.09289266169071198,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.675000011920929,
"regularize": 0.09289265424013138,
"step": 855
},
{
"dpo_loss": 0.5458131432533264,
"epoch": 4.874822862541333,
"grad_norm": 17.337457908328606,
"learning_rate": 7.863060120144316e-10,
"logits": -1.5824497938156128,
"logps": -91.32083892822266,
"loss": 0.0959,
"objective": 0.11178465932607651,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6791666746139526,
"regularize": 0.11178465187549591,
"step": 860
},
{
"dpo_loss": 0.555813729763031,
"epoch": 4.903164855928201,
"grad_norm": 16.568497702615847,
"learning_rate": 4.4239864103465254e-10,
"logits": -1.6553268432617188,
"logps": -90.40623474121094,
"loss": 0.1003,
"objective": 0.1271737664937973,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1271737515926361,
"step": 865
},
{
"dpo_loss": 0.5469813942909241,
"epoch": 4.931506849315069,
"grad_norm": 17.67813635168232,
"learning_rate": 1.966538484758362e-10,
"logits": -1.7142003774642944,
"logps": -92.42487335205078,
"loss": 0.1053,
"objective": 0.10629518330097198,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6625000238418579,
"regularize": 0.1062951609492302,
"step": 870
},
{
"dpo_loss": 0.5456808805465698,
"epoch": 4.959848842701937,
"grad_norm": 17.372715214830695,
"learning_rate": 4.9168297161839014e-11,
"logits": -1.6318602561950684,
"logps": -92.3662109375,
"loss": 0.1017,
"objective": 0.08666170388460159,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6541666388511658,
"regularize": 0.086661696434021,
"step": 875
},
{
"dpo_loss": 0.5451498627662659,
"epoch": 4.988190836088805,
"grad_norm": 17.608890670600516,
"learning_rate": 0.0,
"logits": -1.7033004760742188,
"logps": -92.65689849853516,
"loss": 0.0938,
"objective": 0.09012699872255325,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6541666388511658,
"regularize": 0.09012699127197266,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.19899855256080629,
"train_runtime": 35117.4941,
"train_samples_per_second": 7.233,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}