{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.988190836088805, "eval_steps": 50, "global_step": 880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.005668398677373642, "grad_norm": 13.413600039235007, "learning_rate": 5.681818181818181e-09, "logits": -1.3147305250167847, "logps": -88.0877456665039, "loss": 0.4113, "objective": 0.41588976979255676, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.41588976979255676, "step": 1 }, { "dpo_loss": 0.6931512951850891, "epoch": 0.02834199338686821, "grad_norm": 13.318320815280419, "learning_rate": 2.8409090909090908e-08, "logits": -1.3678321838378906, "logps": -84.44427490234375, "loss": 0.4131, "objective": 0.3755509555339813, "ranking_idealized": 0.6510416865348816, "ranking_idealized_expo": 0.5572916865348816, "ranking_simple": 0.546875, "regularize": 0.3755509555339813, "step": 5 }, { "dpo_loss": 0.6927531361579895, "epoch": 0.05668398677373642, "grad_norm": 13.050623089340824, "learning_rate": 5.6818181818181815e-08, "logits": -1.4463988542556763, "logps": -83.39988708496094, "loss": 0.4176, "objective": 0.4423220753669739, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.4423220753669739, "step": 10 }, { "dpo_loss": 0.6927918195724487, "epoch": 0.08502598016060463, "grad_norm": 12.549385306441062, "learning_rate": 8.522727272727271e-08, "logits": -1.4107797145843506, "logps": -83.50421905517578, "loss": 0.4254, "objective": 0.41179904341697693, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.41179904341697693, "step": 15 }, { "dpo_loss": 0.6924694776535034, "epoch": 0.11336797354747284, "grad_norm": 13.269620119946596, "learning_rate": 1.1363636363636363e-07, "logits": -1.4003115892410278, "logps": -84.06736755371094, "loss": 0.4149, "objective": 0.40317121148109436, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.40317121148109436, "step": 20 }, { "dpo_loss": 0.6906281113624573, "epoch": 0.14170996693434104, "grad_norm": 12.65234373247132, "learning_rate": 1.4204545454545455e-07, "logits": -1.4490704536437988, "logps": -83.72380065917969, "loss": 0.412, "objective": 0.4304184317588806, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.4304184317588806, "step": 25 }, { "dpo_loss": 0.6906370520591736, "epoch": 0.17005196032120926, "grad_norm": 13.419812147505471, "learning_rate": 1.7045454545454543e-07, "logits": -1.4248003959655762, "logps": -84.09757232666016, "loss": 0.4126, "objective": 0.41593801975250244, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5333333611488342, "regularize": 0.41593801975250244, "step": 30 }, { "dpo_loss": 0.6881809234619141, "epoch": 0.19839395370807747, "grad_norm": 13.431894879328123, "learning_rate": 1.9886363636363636e-07, "logits": -1.398374319076538, "logps": -82.60546112060547, "loss": 0.4095, "objective": 0.3929609954357147, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.3929609954357147, "step": 35 }, { "dpo_loss": 0.6868489384651184, "epoch": 0.22673594709494568, "grad_norm": 15.0250838416837, "learning_rate": 2.2727272727272726e-07, "logits": -1.3904410600662231, "logps": -82.84651947021484, "loss": 0.42, "objective": 0.43919187784194946, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5166666507720947, "regularize": 0.43919187784194946, "step": 40 }, { "dpo_loss": 0.6844364404678345, "epoch": 0.25507794048181387, "grad_norm": 13.128806663839857, "learning_rate": 2.5568181818181816e-07, "logits": -1.5230154991149902, "logps": -84.21646118164062, "loss": 0.4194, "objective": 0.4717731177806854, "ranking_idealized": 0.7208333611488342, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5458333492279053, "regularize": 0.4717731177806854, "step": 45 }, { "dpo_loss": 0.6831071376800537, "epoch": 0.2834199338686821, "grad_norm": 12.39410793472882, "learning_rate": 2.840909090909091e-07, "logits": -1.431780219078064, "logps": -82.2941665649414, "loss": 0.4122, "objective": 0.3948862850666046, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.44999998807907104, "regularize": 0.3948862850666046, "step": 50 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6914567947387695, "eval_logits": -1.4614633321762085, "eval_logps": -90.56139373779297, "eval_loss": 0.4102250635623932, "eval_objective": 0.40930914878845215, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5123966932296753, "eval_regularize": 0.40930914878845215, "eval_runtime": 260.1383, "eval_samples_per_second": 22.257, "eval_steps_per_second": 0.93, "step": 50 }, { "dpo_loss": 0.6807647943496704, "epoch": 0.3117619272555503, "grad_norm": 13.979478083508853, "learning_rate": 3.1249999999999997e-07, "logits": -1.4781759977340698, "logps": -84.0101089477539, "loss": 0.4052, "objective": 0.4063163101673126, "ranking_idealized": 0.7124999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.4063163101673126, "step": 55 }, { "dpo_loss": 0.6824926137924194, "epoch": 0.3401039206424185, "grad_norm": 13.484676530515722, "learning_rate": 3.4090909090909085e-07, "logits": -1.4679373502731323, "logps": -83.09486389160156, "loss": 0.3992, "objective": 0.387731671333313, "ranking_idealized": 0.7041666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.387731671333313, "step": 60 }, { "dpo_loss": 0.6788213849067688, "epoch": 0.3684459140292867, "grad_norm": 13.535493104004898, "learning_rate": 3.693181818181818e-07, "logits": -1.4250341653823853, "logps": -83.52283477783203, "loss": 0.3842, "objective": 0.3719988465309143, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 0.3719988465309143, "step": 65 }, { "dpo_loss": 0.6763210296630859, "epoch": 0.39678790741615494, "grad_norm": 13.25897931133664, "learning_rate": 3.977272727272727e-07, "logits": -1.5077797174453735, "logps": -85.39080047607422, "loss": 0.3855, "objective": 0.42043933272361755, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5291666388511658, "regularize": 0.42043933272361755, "step": 70 }, { "dpo_loss": 0.678033709526062, "epoch": 0.42512990080302315, "grad_norm": 14.035157652400327, "learning_rate": 4.2613636363636364e-07, "logits": -1.5349814891815186, "logps": -86.0143051147461, "loss": 0.3945, "objective": 0.41438591480255127, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5666666626930237, "regularize": 0.41438591480255127, "step": 75 }, { "dpo_loss": 0.6748775243759155, "epoch": 0.45347189418989137, "grad_norm": 13.539091864104346, "learning_rate": 4.545454545454545e-07, "logits": -1.5832253694534302, "logps": -85.59701538085938, "loss": 0.3789, "objective": 0.37422579526901245, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.37422579526901245, "step": 80 }, { "dpo_loss": 0.6750870943069458, "epoch": 0.4818138875767596, "grad_norm": 12.78905385712093, "learning_rate": 4.829545454545455e-07, "logits": -1.5551499128341675, "logps": -84.24475860595703, "loss": 0.374, "objective": 0.39821094274520874, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.39821094274520874, "step": 85 }, { "dpo_loss": 0.6703960299491882, "epoch": 0.5101558809636277, "grad_norm": 14.26040681218726, "learning_rate": 4.999921328558332e-07, "logits": -1.37662935256958, "logps": -86.21568298339844, "loss": 0.3761, "objective": 0.3837045729160309, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.4833333194255829, "regularize": 0.3837045729160309, "step": 90 }, { "dpo_loss": 0.658724844455719, "epoch": 0.538497874350496, "grad_norm": 14.003866252787525, "learning_rate": 4.999036331701828e-07, "logits": -1.4695987701416016, "logps": -85.49458312988281, "loss": 0.3642, "objective": 0.39033612608909607, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5666666626930237, "regularize": 0.39033612608909607, "step": 95 }, { "dpo_loss": 0.6546652317047119, "epoch": 0.5668398677373642, "grad_norm": 13.876424116810778, "learning_rate": 4.99716834795752e-07, "logits": -1.5616024732589722, "logps": -86.23612213134766, "loss": 0.374, "objective": 0.4149954915046692, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.4149954915046692, "step": 100 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6882808208465576, "eval_logits": -1.5521211624145508, "eval_logps": -92.03614807128906, "eval_loss": 0.4072900712490082, "eval_objective": 0.40819329023361206, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5144628286361694, "eval_regularize": 0.40819329023361206, "eval_runtime": 258.9254, "eval_samples_per_second": 22.362, "eval_steps_per_second": 0.935, "step": 100 }, { "dpo_loss": 0.6509627103805542, "epoch": 0.5951818611242324, "grad_norm": 15.031011715031442, "learning_rate": 4.994318112090048e-07, "logits": -1.4410721063613892, "logps": -85.85182189941406, "loss": 0.3711, "objective": 0.3764886260032654, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.574999988079071, "regularize": 0.3764886260032654, "step": 105 }, { "dpo_loss": 0.6576470136642456, "epoch": 0.6235238545111006, "grad_norm": 16.068373019347053, "learning_rate": 4.990486745229364e-07, "logits": -1.6439845561981201, "logps": -84.1036376953125, "loss": 0.3694, "objective": 0.39763620495796204, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.48750001192092896, "regularize": 0.39763620495796204, "step": 110 }, { "dpo_loss": 0.6543448567390442, "epoch": 0.6518658478979689, "grad_norm": 15.729444594038945, "learning_rate": 4.985675754429743e-07, "logits": -1.6000815629959106, "logps": -83.94436645507812, "loss": 0.3477, "objective": 0.3455929458141327, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.3455929458141327, "step": 115 }, { "dpo_loss": 0.6514815092086792, "epoch": 0.680207841284837, "grad_norm": 14.396427732147952, "learning_rate": 4.979887032076988e-07, "logits": -1.5459378957748413, "logps": -85.23513793945312, "loss": 0.3534, "objective": 0.3168259561061859, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5458333492279053, "regularize": 0.3168259561061859, "step": 120 }, { "dpo_loss": 0.6492612957954407, "epoch": 0.7085498346717053, "grad_norm": 16.706445645247783, "learning_rate": 4.973122855144065e-07, "logits": -1.5174397230148315, "logps": -86.0051040649414, "loss": 0.3448, "objective": 0.3529473543167114, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5708333253860474, "regularize": 0.3529473543167114, "step": 125 }, { "dpo_loss": 0.65309077501297, "epoch": 0.7368918280585735, "grad_norm": 15.417556754357976, "learning_rate": 4.965385884295466e-07, "logits": -1.664696455001831, "logps": -85.23889923095703, "loss": 0.3464, "objective": 0.33712950348854065, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.4958333373069763, "regularize": 0.33712950348854065, "step": 130 }, { "dpo_loss": 0.6549941301345825, "epoch": 0.7652338214454416, "grad_norm": 13.59480500578719, "learning_rate": 4.956679162840645e-07, "logits": -1.626897931098938, "logps": -86.90068817138672, "loss": 0.3309, "objective": 0.34302666783332825, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5458333492279053, "regularize": 0.34302666783332825, "step": 135 }, { "dpo_loss": 0.6500818729400635, "epoch": 0.7935758148323099, "grad_norm": 14.79485288903614, "learning_rate": 4.947006115536947e-07, "logits": -1.523794412612915, "logps": -86.5340576171875, "loss": 0.3244, "objective": 0.3356337249279022, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5166666507720947, "regularize": 0.3356337249279022, "step": 140 }, { "dpo_loss": 0.6436840295791626, "epoch": 0.821917808219178, "grad_norm": 14.29078834943314, "learning_rate": 4.936370547242482e-07, "logits": -1.5991618633270264, "logps": -86.87805938720703, "loss": 0.3315, "objective": 0.35039833188056946, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.35039833188056946, "step": 145 }, { "dpo_loss": 0.6453251242637634, "epoch": 0.8502598016060463, "grad_norm": 14.662823673975787, "learning_rate": 4.924776641419512e-07, "logits": -1.5607432126998901, "logps": -84.04727935791016, "loss": 0.3231, "objective": 0.32859519124031067, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.32859519124031067, "step": 150 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6880838871002197, "eval_logits": -1.6073634624481201, "eval_logps": -92.8072509765625, "eval_loss": 0.40292537212371826, "eval_objective": 0.4087039530277252, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5185950398445129, "eval_regularize": 0.4087039530277252, "eval_runtime": 258.55, "eval_samples_per_second": 22.394, "eval_steps_per_second": 0.936, "step": 150 }, { "dpo_loss": 0.6386777758598328, "epoch": 0.8786017949929145, "grad_norm": 14.231064502358223, "learning_rate": 4.912228958488892e-07, "logits": -1.5854390859603882, "logps": -84.10832214355469, "loss": 0.3257, "objective": 0.3301841616630554, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5916666388511658, "regularize": 0.3301841616630554, "step": 155 }, { "dpo_loss": 0.635101854801178, "epoch": 0.9069437883797827, "grad_norm": 16.2268120086952, "learning_rate": 4.898732434036243e-07, "logits": -1.4904930591583252, "logps": -86.09799194335938, "loss": 0.3107, "objective": 0.32787373661994934, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5458333492279053, "regularize": 0.32787373661994934, "step": 160 }, { "dpo_loss": 0.632634162902832, "epoch": 0.9352857817666509, "grad_norm": 16.041101199008867, "learning_rate": 4.884292376870567e-07, "logits": -1.5242409706115723, "logps": -86.48987579345703, "loss": 0.3212, "objective": 0.3137226700782776, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.574999988079071, "regularize": 0.3137226700782776, "step": 165 }, { "dpo_loss": 0.6288425922393799, "epoch": 0.9636277751535192, "grad_norm": 16.89173365453321, "learning_rate": 4.868914466936037e-07, "logits": -1.5360677242279053, "logps": -86.72618103027344, "loss": 0.3151, "objective": 0.30297866463661194, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5708333253860474, "regularize": 0.30297866463661194, "step": 170 }, { "dpo_loss": 0.6335378289222717, "epoch": 0.9919697685403873, "grad_norm": 15.347989877166441, "learning_rate": 4.852604753077817e-07, "logits": -1.4790997505187988, "logps": -87.8569107055664, "loss": 0.3103, "objective": 0.29884618520736694, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6166666746139526, "regularize": 0.29884618520736694, "step": 175 }, { "dpo_loss": 0.6220327615737915, "epoch": 1.0203117619272555, "grad_norm": 15.022798279596007, "learning_rate": 4.835369650662767e-07, "logits": -1.6809762716293335, "logps": -87.00578308105469, "loss": 0.2902, "objective": 0.3023075461387634, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5874999761581421, "regularize": 0.3023075461387634, "step": 180 }, { "dpo_loss": 0.6156979203224182, "epoch": 1.0486537553141237, "grad_norm": 17.673807880039096, "learning_rate": 4.817215939055985e-07, "logits": -1.54806387424469, "logps": -86.16964721679688, "loss": 0.2856, "objective": 0.30200377106666565, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5416666865348816, "regularize": 0.30200377106666565, "step": 185 }, { "dpo_loss": 0.6105552315711975, "epoch": 1.076995748700992, "grad_norm": 16.47130075175902, "learning_rate": 4.798150758954164e-07, "logits": -1.6065795421600342, "logps": -88.57856750488281, "loss": 0.2661, "objective": 0.23887412250041962, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6041666865348816, "regularize": 0.23887412250041962, "step": 190 }, { "dpo_loss": 0.6094806790351868, "epoch": 1.10533774208786, "grad_norm": 15.979183042956787, "learning_rate": 4.778181609576831e-07, "logits": -1.58108651638031, "logps": -86.33049011230469, "loss": 0.2734, "objective": 0.23615716397762299, "ranking_idealized": 0.7041666507720947, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.23615716397762299, "step": 195 }, { "dpo_loss": 0.6135362982749939, "epoch": 1.1336797354747283, "grad_norm": 15.241345178579065, "learning_rate": 4.757316345716553e-07, "logits": -1.6668376922607422, "logps": -85.64834594726562, "loss": 0.267, "objective": 0.2661064565181732, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5541666746139526, "regularize": 0.2661064565181732, "step": 200 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6866354942321777, "eval_logits": -1.6423935890197754, "eval_logps": -94.7991943359375, "eval_loss": 0.4068562686443329, "eval_objective": 0.41099515557289124, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5185950398445129, "eval_regularize": 0.41099515557289124, "eval_runtime": 258.6603, "eval_samples_per_second": 22.385, "eval_steps_per_second": 0.936, "step": 200 }, { "dpo_loss": 0.6104326844215393, "epoch": 1.1620217288615966, "grad_norm": 16.09148449696529, "learning_rate": 4.735563174649278e-07, "logits": -1.6373622417449951, "logps": -88.22838592529297, "loss": 0.2748, "objective": 0.2591724991798401, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5541666746139526, "regularize": 0.2591724991798401, "step": 205 }, { "dpo_loss": 0.6027090549468994, "epoch": 1.1903637222484649, "grad_norm": 16.78316844909737, "learning_rate": 4.7129306529060407e-07, "logits": -1.604967474937439, "logps": -88.00846099853516, "loss": 0.2647, "objective": 0.28820380568504333, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5541666746139526, "regularize": 0.28820380568504333, "step": 210 }, { "dpo_loss": 0.6120165586471558, "epoch": 1.2187057156353331, "grad_norm": 17.38824297135803, "learning_rate": 4.6894276829072786e-07, "logits": -1.577252745628357, "logps": -88.0232925415039, "loss": 0.2457, "objective": 0.2474772185087204, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5625, "regularize": 0.2474772185087204, "step": 215 }, { "dpo_loss": 0.6058060526847839, "epoch": 1.2470477090222012, "grad_norm": 16.33918535416867, "learning_rate": 4.6650635094610966e-07, "logits": -1.5445390939712524, "logps": -87.7970199584961, "loss": 0.25, "objective": 0.23831520974636078, "ranking_idealized": 0.7208333611488342, "ranking_idealized_expo": 0.6041666865348816, "ranking_simple": 0.637499988079071, "regularize": 0.23831520974636078, "step": 220 }, { "dpo_loss": 0.6021844744682312, "epoch": 1.2753897024090695, "grad_norm": 14.989005292751132, "learning_rate": 4.639847716126854e-07, "logits": -1.6192957162857056, "logps": -89.04407501220703, "loss": 0.2607, "objective": 0.26420968770980835, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6041666865348816, "regularize": 0.26420965790748596, "step": 225 }, { "dpo_loss": 0.6108235120773315, "epoch": 1.3037316957959377, "grad_norm": 16.245321246774985, "learning_rate": 4.6137902214455106e-07, "logits": -1.5698676109313965, "logps": -89.06554412841797, "loss": 0.2454, "objective": 0.24457047879695892, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5541666746139526, "regularize": 0.24457047879695892, "step": 230 }, { "dpo_loss": 0.59207683801651, "epoch": 1.3320736891828058, "grad_norm": 16.42864016636988, "learning_rate": 4.5869012750382004e-07, "logits": -1.6616859436035156, "logps": -87.82197570800781, "loss": 0.2583, "objective": 0.26300859451293945, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6041666865348816, "regularize": 0.26300859451293945, "step": 235 }, { "dpo_loss": 0.5991641283035278, "epoch": 1.360415682569674, "grad_norm": 17.146089761318706, "learning_rate": 4.5591914535745817e-07, "logits": -1.5948702096939087, "logps": -89.31143188476562, "loss": 0.2442, "objective": 0.25130581855773926, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5666666626930237, "regularize": 0.25130581855773926, "step": 240 }, { "dpo_loss": 0.6016849279403687, "epoch": 1.3887576759565423, "grad_norm": 14.997203138603757, "learning_rate": 4.5306716566125433e-07, "logits": -1.6367671489715576, "logps": -88.17431640625, "loss": 0.2399, "objective": 0.23935823142528534, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.23935823142528534, "step": 245 }, { "dpo_loss": 0.5918813943862915, "epoch": 1.4170996693434104, "grad_norm": 15.668454928081044, "learning_rate": 4.501353102310901e-07, "logits": -1.5877238512039185, "logps": -87.66322326660156, "loss": 0.2432, "objective": 0.2531537711620331, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5625, "regularize": 0.2531537711620331, "step": 250 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6876620650291443, "eval_logits": -1.6720653772354126, "eval_logps": -96.13894653320312, "eval_loss": 0.4107522666454315, "eval_objective": 0.4137335419654846, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.51962810754776, "eval_regularize": 0.4137335419654846, "eval_runtime": 259.3309, "eval_samples_per_second": 22.327, "eval_steps_per_second": 0.933, "step": 250 }, { "dpo_loss": 0.5952737927436829, "epoch": 1.4454416627302786, "grad_norm": 16.40280338029817, "learning_rate": 4.471247323016777e-07, "logits": -1.5863794088363647, "logps": -89.24433898925781, "loss": 0.2442, "objective": 0.2290959656238556, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6291666626930237, "regularize": 0.2290959656238556, "step": 255 }, { "dpo_loss": 0.5954132080078125, "epoch": 1.473783656117147, "grad_norm": 16.11674277744465, "learning_rate": 4.440366160729392e-07, "logits": -1.6588572263717651, "logps": -89.44280242919922, "loss": 0.2443, "objective": 0.2354036122560501, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5625, "regularize": 0.2354036122560501, "step": 260 }, { "dpo_loss": 0.5868396759033203, "epoch": 1.5021256495040152, "grad_norm": 16.753008834337265, "learning_rate": 4.4087217624420585e-07, "logits": -1.6106855869293213, "logps": -88.14371490478516, "loss": 0.239, "objective": 0.24956756830215454, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5958333611488342, "regularize": 0.24956756830215454, "step": 265 }, { "dpo_loss": 0.5915893316268921, "epoch": 1.5304676428908834, "grad_norm": 17.579129679111187, "learning_rate": 4.3763265753642055e-07, "logits": -1.6173158884048462, "logps": -90.8720703125, "loss": 0.2421, "objective": 0.24301743507385254, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5958333611488342, "regularize": 0.24301742017269135, "step": 270 }, { "dpo_loss": 0.5958731770515442, "epoch": 1.5588096362777515, "grad_norm": 16.101798479127662, "learning_rate": 4.34319334202531e-07, "logits": -1.6187034845352173, "logps": -90.12999725341797, "loss": 0.2448, "objective": 0.22899790108203888, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6208333373069763, "regularize": 0.22899790108203888, "step": 275 }, { "dpo_loss": 0.5842003226280212, "epoch": 1.5871516296646198, "grad_norm": 15.709789047148108, "learning_rate": 4.309335095262675e-07, "logits": -1.5244942903518677, "logps": -88.604248046875, "loss": 0.2411, "objective": 0.23850402235984802, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6000000238418579, "regularize": 0.23850402235984802, "step": 280 }, { "dpo_loss": 0.5853084921836853, "epoch": 1.615493623051488, "grad_norm": 16.6854633771705, "learning_rate": 4.274765153095007e-07, "logits": -1.6502856016159058, "logps": -89.77727508544922, "loss": 0.2219, "objective": 0.21514521539211273, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6041666865348816, "regularize": 0.21514521539211273, "step": 285 }, { "dpo_loss": 0.5910848379135132, "epoch": 1.643835616438356, "grad_norm": 18.978761606300836, "learning_rate": 4.239497113483819e-07, "logits": -1.7089149951934814, "logps": -86.87386322021484, "loss": 0.2312, "objective": 0.23006680607795715, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6291666626930237, "regularize": 0.23006680607795715, "step": 290 }, { "dpo_loss": 0.5802692174911499, "epoch": 1.6721776098252243, "grad_norm": 16.652074965539576, "learning_rate": 4.203544848984728e-07, "logits": -1.5955086946487427, "logps": -86.49956512451172, "loss": 0.2276, "objective": 0.23742005228996277, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5791666507720947, "regularize": 0.23742005228996277, "step": 295 }, { "dpo_loss": 0.5916833281517029, "epoch": 1.7005196032120926, "grad_norm": 16.884463449554712, "learning_rate": 4.166922501290729e-07, "logits": -1.6546835899353027, "logps": -88.2989730834961, "loss": 0.2252, "objective": 0.26394858956336975, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6208333373069763, "regularize": 0.26394858956336975, "step": 300 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6866207718849182, "eval_logits": -1.6648496389389038, "eval_logps": -95.62443542480469, "eval_loss": 0.410134494304657, "eval_objective": 0.4137687385082245, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5216942429542542, "eval_regularize": 0.4137687385082245, "eval_runtime": 259.4458, "eval_samples_per_second": 22.317, "eval_steps_per_second": 0.933, "step": 300 }, { "dpo_loss": 0.5952399373054504, "epoch": 1.7288615965989607, "grad_norm": 16.24562342201146, "learning_rate": 4.129644475669616e-07, "logits": -1.6116312742233276, "logps": -88.82595825195312, "loss": 0.218, "objective": 0.2242499738931656, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5833333134651184, "regularize": 0.2242499738931656, "step": 305 }, { "dpo_loss": 0.583368182182312, "epoch": 1.7572035899858292, "grad_norm": 18.099666352463437, "learning_rate": 4.0917254352977206e-07, "logits": -1.7004183530807495, "logps": -87.11441040039062, "loss": 0.2283, "objective": 0.2325660139322281, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.612500011920929, "regularize": 0.2325660139322281, "step": 310 }, { "dpo_loss": 0.5933206677436829, "epoch": 1.7855455833726972, "grad_norm": 16.545516113765466, "learning_rate": 4.053180295492202e-07, "logits": -1.602583408355713, "logps": -88.69900512695312, "loss": 0.2287, "objective": 0.21895338594913483, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5833333134651184, "regularize": 0.21895337104797363, "step": 315 }, { "dpo_loss": 0.5876157283782959, "epoch": 1.8138875767595655, "grad_norm": 18.487916312721516, "learning_rate": 4.0140242178441665e-07, "logits": -1.6777514219284058, "logps": -90.22407531738281, "loss": 0.2153, "objective": 0.20208925008773804, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5874999761581421, "regularize": 0.20208925008773804, "step": 320 }, { "dpo_loss": 0.5746586918830872, "epoch": 1.8422295701464337, "grad_norm": 18.670087833334332, "learning_rate": 3.9742726042549053e-07, "logits": -1.7464016675949097, "logps": -91.97502899169922, "loss": 0.219, "objective": 0.2114688903093338, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6416666507720947, "regularize": 0.2114688903093338, "step": 325 }, { "dpo_loss": 0.5767069458961487, "epoch": 1.8705715635333018, "grad_norm": 18.91235181922618, "learning_rate": 3.933941090877615e-07, "logits": -1.466091275215149, "logps": -90.11954498291016, "loss": 0.219, "objective": 0.2171897292137146, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.574999988079071, "regularize": 0.2171897292137146, "step": 330 }, { "dpo_loss": 0.5894278287887573, "epoch": 1.89891355692017, "grad_norm": 15.592599296406116, "learning_rate": 3.8930455419669744e-07, "logits": -1.6301844120025635, "logps": -89.44200134277344, "loss": 0.2112, "objective": 0.18907961249351501, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6000000238418579, "regularize": 0.18907961249351501, "step": 335 }, { "dpo_loss": 0.5853725075721741, "epoch": 1.9272555503070383, "grad_norm": 15.83418724261755, "learning_rate": 3.851602043638994e-07, "logits": -1.660121202468872, "logps": -91.48560333251953, "loss": 0.2026, "objective": 0.18658672273159027, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6416666507720947, "regularize": 0.18658672273159027, "step": 340 }, { "dpo_loss": 0.5825453400611877, "epoch": 1.9555975436939064, "grad_norm": 16.366640560133238, "learning_rate": 3.809626897543604e-07, "logits": -1.657557725906372, "logps": -90.72650909423828, "loss": 0.1961, "objective": 0.18415075540542603, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6166666746139526, "regularize": 0.18415075540542603, "step": 345 }, { "dpo_loss": 0.5838915109634399, "epoch": 1.9839395370807746, "grad_norm": 17.651439137685784, "learning_rate": 3.7671366144524576e-07, "logits": -1.551125407218933, "logps": -91.74525451660156, "loss": 0.2082, "objective": 0.20508398115634918, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5541666746139526, "regularize": 0.20508398115634918, "step": 350 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6863144040107727, "eval_logits": -1.6988588571548462, "eval_logps": -97.52546691894531, "eval_loss": 0.4102429747581482, "eval_objective": 0.4131539762020111, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.51962810754776, "eval_regularize": 0.4131539762020111, "eval_runtime": 258.5165, "eval_samples_per_second": 22.397, "eval_steps_per_second": 0.936, "step": 350 }, { "dpo_loss": 0.5745717287063599, "epoch": 2.012281530467643, "grad_norm": 15.784086525377202, "learning_rate": 3.724147907764478e-07, "logits": -1.5323989391326904, "logps": -90.18486785888672, "loss": 0.2055, "objective": 0.20713359117507935, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6458333134651184, "regularize": 0.20713359117507935, "step": 355 }, { "dpo_loss": 0.5675494074821472, "epoch": 2.040623523854511, "grad_norm": 20.55210866626824, "learning_rate": 3.6806776869317067e-07, "logits": -1.6239458322525024, "logps": -89.69377899169922, "loss": 0.1726, "objective": 0.17787505686283112, "ranking_idealized": 0.7250000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.675000011920929, "regularize": 0.17787505686283112, "step": 360 }, { "dpo_loss": 0.5650666952133179, "epoch": 2.0689655172413794, "grad_norm": 16.544231581396616, "learning_rate": 3.636743050808028e-07, "logits": -1.6872822046279907, "logps": -91.26659393310547, "loss": 0.1866, "objective": 0.16895455121994019, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6333333253860474, "regularize": 0.16895455121994019, "step": 365 }, { "dpo_loss": 0.575705349445343, "epoch": 2.0973075106282475, "grad_norm": 16.758890304778106, "learning_rate": 3.5923612809233984e-07, "logits": -1.662663221359253, "logps": -87.82825469970703, "loss": 0.1679, "objective": 0.1752353459596634, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.574999988079071, "regularize": 0.1752353310585022, "step": 370 }, { "dpo_loss": 0.5633853077888489, "epoch": 2.1256495040151155, "grad_norm": 16.99783941953761, "learning_rate": 3.5475498346862214e-07, "logits": -1.6271302700042725, "logps": -91.13916015625, "loss": 0.1726, "objective": 0.16911908984184265, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.6791666746139526, "regularize": 0.16911907494068146, "step": 375 }, { "dpo_loss": 0.5752108097076416, "epoch": 2.153991497401984, "grad_norm": 17.23340187781712, "learning_rate": 3.502326338516534e-07, "logits": -1.5394021272659302, "logps": -89.99533081054688, "loss": 0.179, "objective": 0.1650255024433136, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.1650255024433136, "step": 380 }, { "dpo_loss": 0.571977972984314, "epoch": 2.182333490788852, "grad_norm": 15.78796183229778, "learning_rate": 3.4567085809127245e-07, "logits": -1.6716177463531494, "logps": -91.3305892944336, "loss": 0.1653, "objective": 0.13291777670383453, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.612500011920929, "regularize": 0.13291777670383453, "step": 385 }, { "dpo_loss": 0.5752423405647278, "epoch": 2.21067548417572, "grad_norm": 18.129151048308177, "learning_rate": 3.4107145054544855e-07, "logits": -1.5358682870864868, "logps": -91.15263366699219, "loss": 0.1744, "objective": 0.16379062831401825, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5625, "regularize": 0.16379062831401825, "step": 390 }, { "dpo_loss": 0.5485681891441345, "epoch": 2.2390174775625886, "grad_norm": 16.313781937896024, "learning_rate": 3.3643622037447767e-07, "logits": -1.5593619346618652, "logps": -92.42921447753906, "loss": 0.1776, "objective": 0.1637614667415619, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6416666507720947, "regularize": 0.1637614667415619, "step": 395 }, { "dpo_loss": 0.5597947239875793, "epoch": 2.2673594709494567, "grad_norm": 16.659127876259, "learning_rate": 3.317669908293554e-07, "logits": -1.631813645362854, "logps": -92.92410278320312, "loss": 0.1825, "objective": 0.196553573012352, "ranking_idealized": 0.7583333253860474, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.6958333253860474, "regularize": 0.1965535581111908, "step": 400 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6862595677375793, "eval_logits": -1.6931663751602173, "eval_logps": -97.79962158203125, "eval_loss": 0.4124037027359009, "eval_objective": 0.4144473969936371, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5206611752510071, "eval_regularize": 0.4144473969936371, "eval_runtime": 258.4529, "eval_samples_per_second": 22.403, "eval_steps_per_second": 0.936, "step": 400 }, { "dpo_loss": 0.5691500902175903, "epoch": 2.295701464336325, "grad_norm": 17.30117286858182, "learning_rate": 3.270655985346081e-07, "logits": -1.7139372825622559, "logps": -89.78938293457031, "loss": 0.1717, "objective": 0.18224166333675385, "ranking_idealized": 0.737500011920929, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6458333134651184, "regularize": 0.18224166333675385, "step": 405 }, { "dpo_loss": 0.5579439997673035, "epoch": 2.324043457723193, "grad_norm": 17.123218301010457, "learning_rate": 3.223338927658632e-07, "logits": -1.5741162300109863, "logps": -91.07009887695312, "loss": 0.1618, "objective": 0.15759395062923431, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5958333611488342, "regularize": 0.15759395062923431, "step": 410 }, { "dpo_loss": 0.5704253315925598, "epoch": 2.3523854511100613, "grad_norm": 17.68931154440285, "learning_rate": 3.175737347224432e-07, "logits": -1.6476367712020874, "logps": -91.30075073242188, "loss": 0.1732, "objective": 0.19281157851219177, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.612500011920929, "regularize": 0.19281157851219177, "step": 415 }, { "dpo_loss": 0.5771389603614807, "epoch": 2.3807274444969297, "grad_norm": 17.274582557860825, "learning_rate": 3.1278699679526975e-07, "logits": -1.5415838956832886, "logps": -92.63572692871094, "loss": 0.1579, "objective": 0.15308959782123566, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5958333611488342, "regularize": 0.15308959782123566, "step": 420 }, { "dpo_loss": 0.566936731338501, "epoch": 2.409069437883798, "grad_norm": 16.445557447346342, "learning_rate": 3.0797556183036575e-07, "logits": -1.5967096090316772, "logps": -91.4622802734375, "loss": 0.1607, "objective": 0.16068215668201447, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6000000238418579, "regularize": 0.16068214178085327, "step": 425 }, { "dpo_loss": 0.5632474422454834, "epoch": 2.4374114312706663, "grad_norm": 15.962055488306607, "learning_rate": 3.0314132238824415e-07, "logits": -1.6247813701629639, "logps": -92.1604995727539, "loss": 0.1547, "objective": 0.1360505074262619, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.625, "regularize": 0.1360505074262619, "step": 430 }, { "dpo_loss": 0.566851019859314, "epoch": 2.4657534246575343, "grad_norm": 16.006081940650837, "learning_rate": 2.982861799994764e-07, "logits": -1.6544443368911743, "logps": -92.63692474365234, "loss": 0.1637, "objective": 0.17756709456443787, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6208333373069763, "regularize": 0.17756709456443787, "step": 435 }, { "dpo_loss": 0.5565729141235352, "epoch": 2.4940954180444024, "grad_norm": 17.37344722468487, "learning_rate": 2.934120444167326e-07, "logits": -1.5883994102478027, "logps": -91.88066101074219, "loss": 0.159, "objective": 0.15150482952594757, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6041666865348816, "regularize": 0.15150482952594757, "step": 440 }, { "dpo_loss": 0.565682590007782, "epoch": 2.5224374114312704, "grad_norm": 18.453788667979182, "learning_rate": 2.885208328635864e-07, "logits": -1.6123565435409546, "logps": -89.5006332397461, "loss": 0.1576, "objective": 0.1587233543395996, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6458333134651184, "regularize": 0.1587233543395996, "step": 445 }, { "dpo_loss": 0.5824019312858582, "epoch": 2.550779404818139, "grad_norm": 19.424550718198045, "learning_rate": 2.83614469280383e-07, "logits": -1.6537593603134155, "logps": -91.4095230102539, "loss": 0.1504, "objective": 0.15120406448841095, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5791666507720947, "regularize": 0.15120406448841095, "step": 450 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6864377856254578, "eval_logits": -1.711348056793213, "eval_logps": -99.202880859375, "eval_loss": 0.41492125391960144, "eval_objective": 0.4176488518714905, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5216942429542542, "eval_regularize": 0.4176488518714905, "eval_runtime": 258.9375, "eval_samples_per_second": 22.361, "eval_steps_per_second": 0.935, "step": 450 }, { "dpo_loss": 0.5587320923805237, "epoch": 2.579121398205007, "grad_norm": 18.174711126742732, "learning_rate": 2.786948835674634e-07, "logits": -1.6923545598983765, "logps": -92.0631103515625, "loss": 0.1514, "objective": 0.15467478334903717, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6416666507720947, "regularize": 0.15467478334903717, "step": 455 }, { "dpo_loss": 0.5529297590255737, "epoch": 2.6074633915918755, "grad_norm": 18.378396938924546, "learning_rate": 2.737640108260456e-07, "logits": -1.765284776687622, "logps": -92.5921401977539, "loss": 0.1544, "objective": 0.13981758058071136, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6291666626930237, "regularize": 0.13981756567955017, "step": 460 }, { "dpo_loss": 0.5604754090309143, "epoch": 2.6358053849787435, "grad_norm": 17.16312208138119, "learning_rate": 2.6882379059705953e-07, "logits": -1.6412590742111206, "logps": -91.83204650878906, "loss": 0.1571, "objective": 0.15992027521133423, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5791666507720947, "regularize": 0.15992026031017303, "step": 465 }, { "dpo_loss": 0.5741956830024719, "epoch": 2.6641473783656116, "grad_norm": 17.444271577746782, "learning_rate": 2.6387616609823504e-07, "logits": -1.6750518083572388, "logps": -91.33477020263672, "loss": 0.151, "objective": 0.17329135537147522, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.625, "regularize": 0.17329135537147522, "step": 470 }, { "dpo_loss": 0.5746079087257385, "epoch": 2.69248937175248, "grad_norm": 17.607595627923466, "learning_rate": 2.5892308345974514e-07, "logits": -1.6217347383499146, "logps": -90.19564819335938, "loss": 0.1521, "objective": 0.1534017026424408, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6166666746139526, "regularize": 0.1534017026424408, "step": 475 }, { "dpo_loss": 0.56805020570755, "epoch": 2.720831365139348, "grad_norm": 18.441983400540806, "learning_rate": 2.53966490958702e-07, "logits": -1.7197903394699097, "logps": -90.20177459716797, "loss": 0.148, "objective": 0.14620445668697357, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6291666626930237, "regularize": 0.14620442688465118, "step": 480 }, { "dpo_loss": 0.5559974312782288, "epoch": 2.7491733585262166, "grad_norm": 17.00220355810742, "learning_rate": 2.4900833825280967e-07, "logits": -1.628369927406311, "logps": -93.048828125, "loss": 0.1488, "objective": 0.1451708972454071, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6583333611488342, "regularize": 0.1451708972454071, "step": 485 }, { "dpo_loss": 0.555105984210968, "epoch": 2.7775153519130846, "grad_norm": 17.798810379621077, "learning_rate": 2.4405057561347313e-07, "logits": -1.647185206413269, "logps": -90.4990463256836, "loss": 0.1613, "objective": 0.17200501263141632, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.6208333373069763, "regularize": 0.17200501263141632, "step": 490 }, { "dpo_loss": 0.5594576001167297, "epoch": 2.8058573452999527, "grad_norm": 18.076540126591944, "learning_rate": 2.39095153158666e-07, "logits": -1.6548616886138916, "logps": -90.19225311279297, "loss": 0.1504, "objective": 0.1365150660276413, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6208333373069763, "regularize": 0.1365150511264801, "step": 495 }, { "dpo_loss": 0.5552747845649719, "epoch": 2.8341993386868207, "grad_norm": 17.278782223651127, "learning_rate": 2.3414402008585886e-07, "logits": -1.6857832670211792, "logps": -89.0853500366211, "loss": 0.1494, "objective": 0.15246258676052094, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6416666507720947, "regularize": 0.15246258676052094, "step": 500 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6861580014228821, "eval_logits": -1.7174702882766724, "eval_logps": -99.17545318603516, "eval_loss": 0.41525644063949585, "eval_objective": 0.4182237386703491, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5227272510528564, "eval_regularize": 0.4182237386703491, "eval_runtime": 259.2438, "eval_samples_per_second": 22.334, "eval_steps_per_second": 0.933, "step": 500 }, { "dpo_loss": 0.5660989284515381, "epoch": 2.862541332073689, "grad_norm": 18.182680782212074, "learning_rate": 2.2919912390530943e-07, "logits": -1.6143929958343506, "logps": -91.0888900756836, "loss": 0.1437, "objective": 0.16082407534122467, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6333333253860474, "regularize": 0.16082406044006348, "step": 505 }, { "dpo_loss": 0.5675150752067566, "epoch": 2.8908833254605573, "grad_norm": 16.373132303441977, "learning_rate": 2.2426240967401638e-07, "logits": -1.5807684659957886, "logps": -91.39689636230469, "loss": 0.1433, "objective": 0.1494457870721817, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6291666626930237, "regularize": 0.1494457870721817, "step": 510 }, { "dpo_loss": 0.5627566576004028, "epoch": 2.9192253188474258, "grad_norm": 18.008132213394468, "learning_rate": 2.1933581923063837e-07, "logits": -1.7557440996170044, "logps": -91.32353210449219, "loss": 0.1448, "objective": 0.13260915875434875, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.6291666626930237, "regularize": 0.13260914385318756, "step": 515 }, { "dpo_loss": 0.5646940469741821, "epoch": 2.947567312234294, "grad_norm": 17.30767973762921, "learning_rate": 2.1442129043167873e-07, "logits": -1.610668420791626, "logps": -92.7865219116211, "loss": 0.1368, "objective": 0.11772733181715012, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6291666626930237, "regularize": 0.11772733181715012, "step": 520 }, { "dpo_loss": 0.5658089518547058, "epoch": 2.975909305621162, "grad_norm": 18.116492800551395, "learning_rate": 2.0952075638923652e-07, "logits": -1.6272333860397339, "logps": -92.43870544433594, "loss": 0.1424, "objective": 0.15236981213092804, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6416666507720947, "regularize": 0.15236981213092804, "step": 525 }, { "dpo_loss": 0.5534684658050537, "epoch": 3.0042512990080303, "grad_norm": 18.337044286762765, "learning_rate": 2.0463614471062435e-07, "logits": -1.6210473775863647, "logps": -91.47294616699219, "loss": 0.1502, "objective": 0.17477649450302124, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6291666626930237, "regularize": 0.17477649450302124, "step": 530 }, { "dpo_loss": 0.5659457445144653, "epoch": 3.0325932923948984, "grad_norm": 16.444884726429134, "learning_rate": 1.9976937674015026e-07, "logits": -1.6844907999038696, "logps": -93.2222671508789, "loss": 0.1284, "objective": 0.14268328249454498, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6583333611488342, "regularize": 0.14268328249454498, "step": 535 }, { "dpo_loss": 0.5521051287651062, "epoch": 3.0609352857817664, "grad_norm": 19.963683437356444, "learning_rate": 1.9492236680336483e-07, "logits": -1.7760847806930542, "logps": -90.89082336425781, "loss": 0.1216, "objective": 0.10329335182905197, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6541666388511658, "regularize": 0.10329335182905197, "step": 540 }, { "dpo_loss": 0.5619763731956482, "epoch": 3.089277279168635, "grad_norm": 17.450130382767895, "learning_rate": 1.9009702145406724e-07, "logits": -1.6995065212249756, "logps": -92.40625, "loss": 0.1232, "objective": 0.1230437308549881, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.6000000238418579, "regularize": 0.1230437308549881, "step": 545 }, { "dpo_loss": 0.5528106689453125, "epoch": 3.117619272555503, "grad_norm": 18.245098236126562, "learning_rate": 1.8529523872436977e-07, "logits": -1.5086556673049927, "logps": -92.30103302001953, "loss": 0.1407, "objective": 0.12957319617271423, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6791666746139526, "regularize": 0.12957318127155304, "step": 550 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 0.6856257915496826, "eval_logits": -1.7183054685592651, "eval_logps": -99.2997055053711, "eval_loss": 0.4161340296268463, "eval_objective": 0.41743505001068115, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5216942429542542, "eval_regularize": 0.41743505001068115, "eval_runtime": 258.7783, "eval_samples_per_second": 22.374, "eval_steps_per_second": 0.935, "step": 550 }, { "dpo_loss": 0.5473430752754211, "epoch": 3.1459612659423715, "grad_norm": 18.87722095427309, "learning_rate": 1.8051890737811393e-07, "logits": -1.6218358278274536, "logps": -93.05738067626953, "loss": 0.1336, "objective": 0.1305130124092102, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6625000238418579, "regularize": 0.1305130124092102, "step": 555 }, { "dpo_loss": 0.5478367209434509, "epoch": 3.1743032593292395, "grad_norm": 22.714698597290123, "learning_rate": 1.7576990616793137e-07, "logits": -1.601859211921692, "logps": -90.21554565429688, "loss": 0.1212, "objective": 0.10795855522155762, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.637499988079071, "regularize": 0.10795855522155762, "step": 560 }, { "dpo_loss": 0.5566601157188416, "epoch": 3.2026452527161076, "grad_norm": 24.322678833478967, "learning_rate": 1.710501030962438e-07, "logits": -1.663177728652954, "logps": -91.7726058959961, "loss": 0.1298, "objective": 0.13216590881347656, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6458333134651184, "regularize": 0.13216587901115417, "step": 565 }, { "dpo_loss": 0.5519458055496216, "epoch": 3.230987246102976, "grad_norm": 19.102063233264193, "learning_rate": 1.663613546804912e-07, "logits": -1.5763607025146484, "logps": -91.98208618164062, "loss": 0.1293, "objective": 0.13738204538822174, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5916666388511658, "regularize": 0.13738203048706055, "step": 570 }, { "dpo_loss": 0.5559364557266235, "epoch": 3.259329239489844, "grad_norm": 16.15481429380041, "learning_rate": 1.617055052228768e-07, "logits": -1.6705526113510132, "logps": -92.17435455322266, "loss": 0.1266, "objective": 0.12801046669483185, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6499999761581421, "regularize": 0.12801046669483185, "step": 575 }, { "dpo_loss": 0.5649384260177612, "epoch": 3.287671232876712, "grad_norm": 17.44743081337015, "learning_rate": 1.5708438608491815e-07, "logits": -1.6591442823410034, "logps": -93.50952911376953, "loss": 0.1277, "objective": 0.11801984906196594, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6291666626930237, "regularize": 0.11801984906196594, "step": 580 }, { "dpo_loss": 0.5595548152923584, "epoch": 3.3160132262635806, "grad_norm": 16.72082331684023, "learning_rate": 1.524998149670871e-07, "logits": -1.69523286819458, "logps": -93.74117279052734, "loss": 0.12, "objective": 0.10769928246736526, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6583333611488342, "regularize": 0.10769927501678467, "step": 585 }, { "dpo_loss": 0.561581015586853, "epoch": 3.3443552196504487, "grad_norm": 19.465809423510365, "learning_rate": 1.479535951938243e-07, "logits": -1.7049933671951294, "logps": -93.83617401123047, "loss": 0.1205, "objective": 0.09809862077236176, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.637499988079071, "regularize": 0.09809862077236176, "step": 590 }, { "dpo_loss": 0.5538628101348877, "epoch": 3.372697213037317, "grad_norm": 17.81052400873953, "learning_rate": 1.43447515004208e-07, "logits": -1.613613247871399, "logps": -92.85578155517578, "loss": 0.1191, "objective": 0.12334737926721573, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.637499988079071, "regularize": 0.12334737926721573, "step": 595 }, { "dpo_loss": 0.5357978940010071, "epoch": 3.4010392064241852, "grad_norm": 18.626853535104544, "learning_rate": 1.3898334684855645e-07, "logits": -1.624743938446045, "logps": -92.40316009521484, "loss": 0.1149, "objective": 0.13463754951953888, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6458333134651184, "regularize": 0.13463754951953888, "step": 600 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 0.6852067112922668, "eval_logits": -1.71807062625885, "eval_logps": -99.92455291748047, "eval_loss": 0.41705650091171265, "eval_objective": 0.41811424493789673, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.41811424493789673, "eval_runtime": 259.0859, "eval_samples_per_second": 22.348, "eval_steps_per_second": 0.934, "step": 600 }, { "dpo_loss": 0.5652448534965515, "epoch": 3.4293811998110533, "grad_norm": 17.958926430591173, "learning_rate": 1.3456284669124157e-07, "logits": -1.6740020513534546, "logps": -94.55862426757812, "loss": 0.1179, "objective": 0.11572790890932083, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.625, "regularize": 0.11572788655757904, "step": 605 }, { "dpo_loss": 0.5543821454048157, "epoch": 3.4577231931979218, "grad_norm": 17.326848783729876, "learning_rate": 1.301877533199859e-07, "logits": -1.6315828561782837, "logps": -92.49845886230469, "loss": 0.1149, "objective": 0.1067105308175087, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6499999761581421, "regularize": 0.1067105159163475, "step": 610 }, { "dpo_loss": 0.5443283915519714, "epoch": 3.48606518658479, "grad_norm": 16.84586393500809, "learning_rate": 1.2585978766191724e-07, "logits": -1.664933681488037, "logps": -93.27455139160156, "loss": 0.1142, "objective": 0.10945113748311996, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5958333611488342, "regularize": 0.10945113748311996, "step": 615 }, { "dpo_loss": 0.5419160723686218, "epoch": 3.514407179971658, "grad_norm": 17.32874521556865, "learning_rate": 1.2158065210664848e-07, "logits": -1.5332224369049072, "logps": -92.34308624267578, "loss": 0.1203, "objective": 0.12084861099720001, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6166666746139526, "regularize": 0.12084860354661942, "step": 620 }, { "dpo_loss": 0.5541211366653442, "epoch": 3.5427491733585263, "grad_norm": 17.48530471086995, "learning_rate": 1.1735202983664802e-07, "logits": -1.6171096563339233, "logps": -91.3125991821289, "loss": 0.1178, "objective": 0.11125477403402328, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6416666507720947, "regularize": 0.11125477403402328, "step": 625 }, { "dpo_loss": 0.5698776245117188, "epoch": 3.5710911667453944, "grad_norm": 17.864701578880954, "learning_rate": 1.1317558416516696e-07, "logits": -1.697689175605774, "logps": -91.67240905761719, "loss": 0.1261, "objective": 0.13253255188465118, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6083333492279053, "regularize": 0.13253255188465118, "step": 630 }, { "dpo_loss": 0.5599467158317566, "epoch": 3.5994331601322624, "grad_norm": 17.33519253157568, "learning_rate": 1.090529578819799e-07, "logits": -1.6461411714553833, "logps": -91.57376098632812, "loss": 0.1157, "objective": 0.10732007026672363, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6208333373069763, "regularize": 0.10732006281614304, "step": 635 }, { "dpo_loss": 0.540539562702179, "epoch": 3.627775153519131, "grad_norm": 17.025667462047203, "learning_rate": 1.0498577260720048e-07, "logits": -1.5717778205871582, "logps": -93.14022827148438, "loss": 0.1146, "objective": 0.13500064611434937, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.574999988079071, "regularize": 0.13500064611434937, "step": 640 }, { "dpo_loss": 0.5469278693199158, "epoch": 3.656117146905999, "grad_norm": 17.536092815770388, "learning_rate": 1.0097562815342214e-07, "logits": -1.6058826446533203, "logps": -90.76680755615234, "loss": 0.1144, "objective": 0.1191474050283432, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6291666626930237, "regularize": 0.1191474050283432, "step": 645 }, { "dpo_loss": 0.5611483454704285, "epoch": 3.6844591402928675, "grad_norm": 17.646206320924833, "learning_rate": 9.702410189643836e-08, "logits": -1.6121342182159424, "logps": -92.83375549316406, "loss": 0.1108, "objective": 0.09943919628858566, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6083333492279053, "regularize": 0.09943918883800507, "step": 650 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 0.6852837800979614, "eval_logits": -1.7315040826797485, "eval_logps": -99.91177368164062, "eval_loss": 0.41784536838531494, "eval_objective": 0.41884875297546387, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.41884875297546387, "eval_runtime": 259.4431, "eval_samples_per_second": 22.317, "eval_steps_per_second": 0.933, "step": 650 }, { "dpo_loss": 0.5510907769203186, "epoch": 3.7128011336797355, "grad_norm": 17.84028807284025, "learning_rate": 9.313274815478698e-08, "logits": -1.6280105113983154, "logps": -92.27388763427734, "loss": 0.117, "objective": 0.10249165445566177, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6291666626930237, "regularize": 0.10249165445566177, "step": 655 }, { "dpo_loss": 0.5551621913909912, "epoch": 3.7411431270666036, "grad_norm": 17.863904309670215, "learning_rate": 8.930309757836516e-08, "logits": -1.7605994939804077, "logps": -92.74076080322266, "loss": 0.1162, "objective": 0.11682406812906265, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6625000238418579, "regularize": 0.11682406812906265, "step": 660 }, { "dpo_loss": 0.5553780198097229, "epoch": 3.769485120453472, "grad_norm": 17.287090327509993, "learning_rate": 8.553665654635342e-08, "logits": -1.6500779390335083, "logps": -92.00687408447266, "loss": 0.116, "objective": 0.11367592960596085, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6541666388511658, "regularize": 0.11367591470479965, "step": 665 }, { "dpo_loss": 0.5561904311180115, "epoch": 3.79782711384034, "grad_norm": 18.08647866984471, "learning_rate": 8.183490657468686e-08, "logits": -1.7430044412612915, "logps": -92.36637878417969, "loss": 0.1153, "objective": 0.13086958229541779, "ranking_idealized": 0.7333333492279053, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.7124999761581421, "regularize": 0.13086958229541779, "step": 670 }, { "dpo_loss": 0.5525475740432739, "epoch": 3.826169107227208, "grad_norm": 17.678219555630616, "learning_rate": 7.819930373330669e-08, "logits": -1.6892848014831543, "logps": -91.58055114746094, "loss": 0.1165, "objective": 0.10088498890399933, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6416666507720947, "regularize": 0.10088498145341873, "step": 675 }, { "dpo_loss": 0.5708147883415222, "epoch": 3.8545111006140766, "grad_norm": 17.208804705028182, "learning_rate": 7.463127807341966e-08, "logits": -1.6462949514389038, "logps": -92.41487884521484, "loss": 0.1181, "objective": 0.11868777871131897, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6208333373069763, "regularize": 0.11868777871131897, "step": 680 }, { "dpo_loss": 0.5442604422569275, "epoch": 3.8828530940009447, "grad_norm": 18.600166194890996, "learning_rate": 7.113223306499336e-08, "logits": -1.7259678840637207, "logps": -91.63528442382812, "loss": 0.1127, "objective": 0.10472600162029266, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6499999761581421, "regularize": 0.10472600162029266, "step": 685 }, { "dpo_loss": 0.5496628284454346, "epoch": 3.9111950873878127, "grad_norm": 17.80496655704031, "learning_rate": 6.770354504470574e-08, "logits": -1.6540542840957642, "logps": -90.78262329101562, "loss": 0.1164, "objective": 0.10735266655683517, "ranking_idealized": 0.7041666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6875, "regularize": 0.10735265165567398, "step": 690 }, { "dpo_loss": 0.5639461874961853, "epoch": 3.9395370807746812, "grad_norm": 16.492816826616206, "learning_rate": 6.434656267456842e-08, "logits": -1.6047898530960083, "logps": -92.38011932373047, "loss": 0.1193, "objective": 0.12910698354244232, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.12910698354244232, "step": 695 }, { "dpo_loss": 0.5509793162345886, "epoch": 3.9678790741615493, "grad_norm": 19.17114576937693, "learning_rate": 6.106260641143546e-08, "logits": -1.6564711332321167, "logps": -92.65071868896484, "loss": 0.1146, "objective": 0.1030283123254776, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5958333611488342, "regularize": 0.103028304874897, "step": 700 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 0.6854080557823181, "eval_logits": -1.7319272756576538, "eval_logps": -99.89824676513672, "eval_loss": 0.4175797998905182, "eval_objective": 0.4186650514602661, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.4186650514602661, "eval_runtime": 258.7065, "eval_samples_per_second": 22.381, "eval_steps_per_second": 0.935, "step": 700 }, { "dpo_loss": 0.5571620464324951, "epoch": 3.9962210675484178, "grad_norm": 18.74884098276965, "learning_rate": 5.7852967987606e-08, "logits": -1.554320216178894, "logps": -90.9109878540039, "loss": 0.1135, "objective": 0.12702669203281403, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6583333611488342, "regularize": 0.12702666223049164, "step": 705 }, { "dpo_loss": 0.5528541803359985, "epoch": 4.024563060935286, "grad_norm": 17.40621172754528, "learning_rate": 5.471890990272665e-08, "logits": -1.6468113660812378, "logps": -92.61128997802734, "loss": 0.1135, "objective": 0.1373264044523239, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.612500011920929, "regularize": 0.13732638955116272, "step": 710 }, { "dpo_loss": 0.5556226968765259, "epoch": 4.052905054322154, "grad_norm": 18.812501686123863, "learning_rate": 5.166166492719124e-08, "logits": -1.6049120426177979, "logps": -92.74799346923828, "loss": 0.1035, "objective": 0.10493499785661697, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6458333134651184, "regularize": 0.10493497550487518, "step": 715 }, { "dpo_loss": 0.5424375534057617, "epoch": 4.081247047709022, "grad_norm": 17.435339866299028, "learning_rate": 4.868243561723534e-08, "logits": -1.511703372001648, "logps": -94.71248626708984, "loss": 0.0978, "objective": 0.09467672556638718, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6166666746139526, "regularize": 0.09467671811580658, "step": 720 }, { "dpo_loss": 0.5652304291725159, "epoch": 4.109589041095891, "grad_norm": 17.507850551258127, "learning_rate": 4.578239384191529e-08, "logits": -1.6384118795394897, "logps": -92.1180191040039, "loss": 0.1076, "objective": 0.09941933304071426, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.637499988079071, "regularize": 0.09941932559013367, "step": 725 }, { "dpo_loss": 0.5497789978981018, "epoch": 4.137931034482759, "grad_norm": 18.803004868454263, "learning_rate": 4.296268032215733e-08, "logits": -1.7138111591339111, "logps": -91.83662414550781, "loss": 0.1086, "objective": 0.10822432488203049, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6208333373069763, "regularize": 0.10822432488203049, "step": 730 }, { "dpo_loss": 0.55525803565979, "epoch": 4.166273027869627, "grad_norm": 18.177348382836357, "learning_rate": 4.022440418205944e-08, "logits": -1.6232236623764038, "logps": -93.14463806152344, "loss": 0.1028, "objective": 0.10451411455869675, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6541666388511658, "regularize": 0.10451411455869675, "step": 735 }, { "dpo_loss": 0.535234808921814, "epoch": 4.194615021256495, "grad_norm": 17.133543410858152, "learning_rate": 3.756864251262143e-08, "logits": -1.610323190689087, "logps": -93.36137390136719, "loss": 0.1062, "objective": 0.10550294071435928, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6541666388511658, "regularize": 0.10550292581319809, "step": 740 }, { "dpo_loss": 0.546442449092865, "epoch": 4.222957014643363, "grad_norm": 18.78547392108143, "learning_rate": 3.4996439948074855e-08, "logits": -1.6879092454910278, "logps": -90.12301635742188, "loss": 0.1001, "objective": 0.10297367721796036, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6208333373069763, "regularize": 0.10297366231679916, "step": 745 }, { "dpo_loss": 0.5416663289070129, "epoch": 4.251299008030231, "grad_norm": 19.462231175744662, "learning_rate": 3.250880825498026e-08, "logits": -1.8104737997055054, "logps": -92.32807922363281, "loss": 0.0986, "objective": 0.10099396854639053, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6291666626930237, "regularize": 0.10099395364522934, "step": 750 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 0.6853212714195251, "eval_logits": -1.7322306632995605, "eval_logps": -99.86943054199219, "eval_loss": 0.41747406125068665, "eval_objective": 0.41828420758247375, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.41828420758247375, "eval_runtime": 258.964, "eval_samples_per_second": 22.358, "eval_steps_per_second": 0.934, "step": 750 }, { "dpo_loss": 0.5468146800994873, "epoch": 4.2796410014171, "grad_norm": 18.270752967286892, "learning_rate": 3.010672593425209e-08, "logits": -1.7138711214065552, "logps": -92.11996459960938, "loss": 0.1147, "objective": 0.10049024224281311, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6458333134651184, "regularize": 0.10049023479223251, "step": 755 }, { "dpo_loss": 0.5455428957939148, "epoch": 4.307982994803968, "grad_norm": 17.285027330879423, "learning_rate": 2.7791137836269158e-08, "logits": -1.6757961511611938, "logps": -93.05391693115234, "loss": 0.0961, "objective": 0.10086000710725784, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6833333373069763, "regularize": 0.10086000710725784, "step": 760 }, { "dpo_loss": 0.5395826101303101, "epoch": 4.336324988190836, "grad_norm": 17.680760382133624, "learning_rate": 2.556295478922116e-08, "logits": -1.7200431823730469, "logps": -93.1734619140625, "loss": 0.1053, "objective": 0.12091321498155594, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6833333373069763, "regularize": 0.12091320008039474, "step": 765 }, { "dpo_loss": 0.5380468964576721, "epoch": 4.364666981577704, "grad_norm": 17.361908211383366, "learning_rate": 2.3423053240837514e-08, "logits": -1.577264428138733, "logps": -91.18030548095703, "loss": 0.1064, "objective": 0.11028440296649933, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.637499988079071, "regularize": 0.11028438061475754, "step": 770 }, { "dpo_loss": 0.5494747161865234, "epoch": 4.393008974964572, "grad_norm": 16.640977636984772, "learning_rate": 2.137227491364016e-08, "logits": -1.627792239189148, "logps": -91.97000885009766, "loss": 0.1067, "objective": 0.10595239698886871, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6333333253860474, "regularize": 0.10595235228538513, "step": 775 }, { "dpo_loss": 0.5575358867645264, "epoch": 4.42135096835144, "grad_norm": 17.112345268128863, "learning_rate": 1.9411426473854687e-08, "logits": -1.693690538406372, "logps": -90.5418472290039, "loss": 0.1004, "objective": 0.10788667947053909, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6208333373069763, "regularize": 0.10788667947053909, "step": 780 }, { "dpo_loss": 0.5676646828651428, "epoch": 4.449692961738309, "grad_norm": 16.657419373072543, "learning_rate": 1.7541279214111275e-08, "logits": -1.7215303182601929, "logps": -90.63499450683594, "loss": 0.1128, "objective": 0.1177934780716896, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.612500011920929, "regularize": 0.1177934780716896, "step": 785 }, { "dpo_loss": 0.539345920085907, "epoch": 4.478034955125177, "grad_norm": 17.089680176209615, "learning_rate": 1.57625687500596e-08, "logits": -1.6345340013504028, "logps": -93.4063720703125, "loss": 0.0998, "objective": 0.10817180573940277, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6916666626930237, "regularize": 0.10817176848649979, "step": 790 }, { "dpo_loss": 0.547528088092804, "epoch": 4.506376948512045, "grad_norm": 17.320194779122446, "learning_rate": 1.4075994731016894e-08, "logits": -1.5627334117889404, "logps": -93.30286407470703, "loss": 0.1058, "objective": 0.102629154920578, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6291666626930237, "regularize": 0.10262913256883621, "step": 795 }, { "dpo_loss": 0.5486911535263062, "epoch": 4.534718941898913, "grad_norm": 17.9229448629881, "learning_rate": 1.2482220564763667e-08, "logits": -1.5870776176452637, "logps": -92.80538940429688, "loss": 0.1042, "objective": 0.11460768431425095, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6499999761581421, "regularize": 0.11460768431425095, "step": 800 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 0.6852768659591675, "eval_logits": -1.7317209243774414, "eval_logps": -99.85995483398438, "eval_loss": 0.4175398349761963, "eval_objective": 0.4183157980442047, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.4183157980442047, "eval_runtime": 259.0856, "eval_samples_per_second": 22.348, "eval_steps_per_second": 0.934, "step": 800 }, { "dpo_loss": 0.5488670468330383, "epoch": 4.563060935285781, "grad_norm": 18.799592478883184, "learning_rate": 1.0981873156594379e-08, "logits": -1.627816081047058, "logps": -91.32179260253906, "loss": 0.1001, "objective": 0.10988225042819977, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.6041666865348816, "regularize": 0.10988224297761917, "step": 805 }, { "dpo_loss": 0.5488799810409546, "epoch": 4.59140292867265, "grad_norm": 16.566296936758206, "learning_rate": 9.575542662726754e-09, "logits": -1.7243562936782837, "logps": -91.10765075683594, "loss": 0.0996, "objective": 0.08869278430938721, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5833333134651184, "regularize": 0.08869277685880661, "step": 810 }, { "dpo_loss": 0.5502530336380005, "epoch": 4.619744922059518, "grad_norm": 16.948787644578637, "learning_rate": 8.263782258165819e-09, "logits": -1.5700196027755737, "logps": -92.37843322753906, "loss": 0.0991, "objective": 0.07977009564638138, "ranking_idealized": 0.7250000238418579, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.6958333253860474, "regularize": 0.07977008074522018, "step": 815 }, { "dpo_loss": 0.5550402402877808, "epoch": 4.648086915446386, "grad_norm": 21.501267763535818, "learning_rate": 7.047107919114586e-09, "logits": -1.6636712551116943, "logps": -92.61454010009766, "loss": 0.0982, "objective": 0.08367303013801575, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6708333492279053, "regularize": 0.08367302268743515, "step": 820 }, { "dpo_loss": 0.5588962435722351, "epoch": 4.6764289088332545, "grad_norm": 16.315710057694485, "learning_rate": 5.925998220016659e-09, "logits": -1.5499807596206665, "logps": -90.22130584716797, "loss": 0.1018, "objective": 0.09301813691854477, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5833333134651184, "regularize": 0.09301812201738358, "step": 825 }, { "dpo_loss": 0.5498708486557007, "epoch": 4.7047709022201225, "grad_norm": 17.748669049129045, "learning_rate": 4.9008941453107525e-09, "logits": -1.7388263940811157, "logps": -92.17695617675781, "loss": 0.1092, "objective": 0.11468993872404099, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6416666507720947, "regularize": 0.1146899089217186, "step": 830 }, { "dpo_loss": 0.5327169299125671, "epoch": 4.733112895606991, "grad_norm": 17.389468155390862, "learning_rate": 3.9721989159709754e-09, "logits": -1.6580873727798462, "logps": -92.22929382324219, "loss": 0.1, "objective": 0.09779965132474899, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6041666865348816, "regularize": 0.0977996364235878, "step": 835 }, { "dpo_loss": 0.5324665904045105, "epoch": 4.7614548889938595, "grad_norm": 18.51697686947663, "learning_rate": 3.140277830901428e-09, "logits": -1.6570351123809814, "logps": -92.75865173339844, "loss": 0.1079, "objective": 0.11856434494256973, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6499999761581421, "regularize": 0.11856433004140854, "step": 840 }, { "dpo_loss": 0.560818612575531, "epoch": 4.7897968823807275, "grad_norm": 17.858581136510683, "learning_rate": 2.4054581232470785e-09, "logits": -1.6901015043258667, "logps": -92.13179016113281, "loss": 0.1067, "objective": 0.10368030518293381, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.6208333373069763, "regularize": 0.10368029028177261, "step": 845 }, { "dpo_loss": 0.5496495366096497, "epoch": 4.818138875767596, "grad_norm": 17.640915580271592, "learning_rate": 1.7680288316779256e-09, "logits": -1.6190950870513916, "logps": -90.9464340209961, "loss": 0.103, "objective": 0.10453298687934875, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6333333253860474, "regularize": 0.10453297942876816, "step": 850 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 0.6852567791938782, "eval_logits": -1.732380986213684, "eval_logps": -99.89720153808594, "eval_loss": 0.4175875782966614, "eval_objective": 0.4183763563632965, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5237603187561035, "eval_regularize": 0.4183763563632965, "eval_runtime": 259.1903, "eval_samples_per_second": 22.339, "eval_steps_per_second": 0.934, "step": 850 }, { "dpo_loss": 0.5584205389022827, "epoch": 4.846480869154464, "grad_norm": 17.56984774908714, "learning_rate": 1.2282406866966078e-09, "logits": -1.6185228824615479, "logps": -91.83565521240234, "loss": 0.0974, "objective": 0.09289266169071198, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.675000011920929, "regularize": 0.09289265424013138, "step": 855 }, { "dpo_loss": 0.5458131432533264, "epoch": 4.874822862541333, "grad_norm": 17.337457908328606, "learning_rate": 7.863060120144316e-10, "logits": -1.5824497938156128, "logps": -91.32083892822266, "loss": 0.0959, "objective": 0.11178465932607651, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6791666746139526, "regularize": 0.11178465187549591, "step": 860 }, { "dpo_loss": 0.555813729763031, "epoch": 4.903164855928201, "grad_norm": 16.568497702615847, "learning_rate": 4.4239864103465254e-10, "logits": -1.6553268432617188, "logps": -90.40623474121094, "loss": 0.1003, "objective": 0.1271737664937973, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6041666865348816, "regularize": 0.1271737515926361, "step": 865 }, { "dpo_loss": 0.5469813942909241, "epoch": 4.931506849315069, "grad_norm": 17.67813635168232, "learning_rate": 1.966538484758362e-10, "logits": -1.7142003774642944, "logps": -92.42487335205078, "loss": 0.1053, "objective": 0.10629518330097198, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6625000238418579, "regularize": 0.1062951609492302, "step": 870 }, { "dpo_loss": 0.5456808805465698, "epoch": 4.959848842701937, "grad_norm": 17.372715214830695, "learning_rate": 4.9168297161839014e-11, "logits": -1.6318602561950684, "logps": -92.3662109375, "loss": 0.1017, "objective": 0.08666170388460159, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6541666388511658, "regularize": 0.086661696434021, "step": 875 }, { "dpo_loss": 0.5451498627662659, "epoch": 4.988190836088805, "grad_norm": 17.608890670600516, "learning_rate": 0.0, "logits": -1.7033004760742188, "logps": -92.65689849853516, "loss": 0.0938, "objective": 0.09012699872255325, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6541666388511658, "regularize": 0.09012699127197266, "step": 880 }, { "epoch": 4.988190836088805, "step": 880, "total_flos": 0.0, "train_loss": 0.19899855256080629, "train_runtime": 35117.4941, "train_samples_per_second": 7.233, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 880, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }