diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,2969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 880, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.005668398677373642, + "grad_norm": 13.413600039235007, + "learning_rate": 5.681818181818181e-09, + "logits": -1.3147305250167847, + "logps": -88.0877456665039, + "loss": 0.4113, + "objective": 0.41588976979255676, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.41588976979255676, + "step": 1 + }, + { + "dpo_loss": 0.6931512951850891, + "epoch": 0.02834199338686821, + "grad_norm": 13.318320815280419, + "learning_rate": 2.8409090909090908e-08, + "logits": -1.3678321838378906, + "logps": -84.44427490234375, + "loss": 0.4131, + "objective": 0.3755509555339813, + "ranking_idealized": 0.6510416865348816, + "ranking_idealized_expo": 0.5572916865348816, + "ranking_simple": 0.546875, + "regularize": 0.3755509555339813, + "step": 5 + }, + { + "dpo_loss": 0.6927531361579895, + "epoch": 0.05668398677373642, + "grad_norm": 13.050623089340824, + "learning_rate": 5.6818181818181815e-08, + "logits": -1.4463988542556763, + "logps": -83.39988708496094, + "loss": 0.4176, + "objective": 0.4423220753669739, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.4423220753669739, + "step": 10 + }, + { + "dpo_loss": 0.6927918195724487, + "epoch": 0.08502598016060463, + "grad_norm": 12.549385306441062, + "learning_rate": 8.522727272727271e-08, + "logits": -1.4107797145843506, + "logps": -83.50421905517578, + "loss": 0.4254, + "objective": 0.41179904341697693, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.41179904341697693, + "step": 15 + }, + { + "dpo_loss": 0.6924694776535034, + "epoch": 0.11336797354747284, + "grad_norm": 13.269620119946596, + "learning_rate": 1.1363636363636363e-07, + "logits": -1.4003115892410278, + "logps": -84.06736755371094, + "loss": 0.4149, + "objective": 0.40317121148109436, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.40317121148109436, + "step": 20 + }, + { + "dpo_loss": 0.6906281113624573, + "epoch": 0.14170996693434104, + "grad_norm": 12.65234373247132, + "learning_rate": 1.4204545454545455e-07, + "logits": -1.4490704536437988, + "logps": -83.72380065917969, + "loss": 0.412, + "objective": 0.4304184317588806, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.4304184317588806, + "step": 25 + }, + { + "dpo_loss": 0.6906370520591736, + "epoch": 0.17005196032120926, + "grad_norm": 13.419812147505471, + "learning_rate": 1.7045454545454543e-07, + "logits": -1.4248003959655762, + "logps": -84.09757232666016, + "loss": 0.4126, + "objective": 0.41593801975250244, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5333333611488342, + "regularize": 0.41593801975250244, + "step": 30 + }, + { + "dpo_loss": 0.6881809234619141, + "epoch": 0.19839395370807747, + "grad_norm": 13.431894879328123, + "learning_rate": 1.9886363636363636e-07, + "logits": -1.398374319076538, + "logps": -82.60546112060547, + "loss": 0.4095, + "objective": 0.3929609954357147, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.3929609954357147, + "step": 35 + }, + { + "dpo_loss": 0.6868489384651184, + "epoch": 0.22673594709494568, + "grad_norm": 15.0250838416837, + "learning_rate": 2.2727272727272726e-07, + "logits": -1.3904410600662231, + "logps": -82.84651947021484, + "loss": 0.42, + "objective": 0.43919187784194946, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5166666507720947, + "regularize": 0.43919187784194946, + "step": 40 + }, + { + "dpo_loss": 0.6844364404678345, + "epoch": 0.25507794048181387, + "grad_norm": 13.128806663839857, + "learning_rate": 2.5568181818181816e-07, + "logits": -1.5230154991149902, + "logps": -84.21646118164062, + "loss": 0.4194, + "objective": 0.4717731177806854, + "ranking_idealized": 0.7208333611488342, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5458333492279053, + "regularize": 0.4717731177806854, + "step": 45 + }, + { + "dpo_loss": 0.6831071376800537, + "epoch": 0.2834199338686821, + "grad_norm": 12.39410793472882, + "learning_rate": 2.840909090909091e-07, + "logits": -1.431780219078064, + "logps": -82.2941665649414, + "loss": 0.4122, + "objective": 0.3948862850666046, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.42500001192092896, + "ranking_simple": 0.44999998807907104, + "regularize": 0.3948862850666046, + "step": 50 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.6914567947387695, + "eval_logits": -1.4614633321762085, + "eval_logps": -90.56139373779297, + "eval_loss": 0.4102250635623932, + "eval_objective": 0.40930914878845215, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5123966932296753, + "eval_regularize": 0.40930914878845215, + "eval_runtime": 260.1383, + "eval_samples_per_second": 22.257, + "eval_steps_per_second": 0.93, + "step": 50 + }, + { + "dpo_loss": 0.6807647943496704, + "epoch": 0.3117619272555503, + "grad_norm": 13.979478083508853, + "learning_rate": 3.1249999999999997e-07, + "logits": -1.4781759977340698, + "logps": -84.0101089477539, + "loss": 0.4052, + "objective": 0.4063163101673126, + "ranking_idealized": 0.7124999761581421, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.4063163101673126, + "step": 55 + }, + { + "dpo_loss": 0.6824926137924194, + "epoch": 0.3401039206424185, + "grad_norm": 13.484676530515722, + "learning_rate": 3.4090909090909085e-07, + "logits": -1.4679373502731323, + "logps": -83.09486389160156, + "loss": 0.3992, + "objective": 0.387731671333313, + "ranking_idealized": 0.7041666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5333333611488342, + "regularize": 0.387731671333313, + "step": 60 + }, + { + "dpo_loss": 0.6788213849067688, + "epoch": 0.3684459140292867, + "grad_norm": 13.535493104004898, + "learning_rate": 3.693181818181818e-07, + "logits": -1.4250341653823853, + "logps": -83.52283477783203, + "loss": 0.3842, + "objective": 0.3719988465309143, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5083333253860474, + "regularize": 0.3719988465309143, + "step": 65 + }, + { + "dpo_loss": 0.6763210296630859, + "epoch": 0.39678790741615494, + "grad_norm": 13.25897931133664, + "learning_rate": 3.977272727272727e-07, + "logits": -1.5077797174453735, + "logps": -85.39080047607422, + "loss": 0.3855, + "objective": 0.42043933272361755, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5291666388511658, + "regularize": 0.42043933272361755, + "step": 70 + }, + { + "dpo_loss": 0.678033709526062, + "epoch": 0.42512990080302315, + "grad_norm": 14.035157652400327, + "learning_rate": 4.2613636363636364e-07, + "logits": -1.5349814891815186, + "logps": -86.0143051147461, + "loss": 0.3945, + "objective": 0.41438591480255127, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5666666626930237, + "regularize": 0.41438591480255127, + "step": 75 + }, + { + "dpo_loss": 0.6748775243759155, + "epoch": 0.45347189418989137, + "grad_norm": 13.539091864104346, + "learning_rate": 4.545454545454545e-07, + "logits": -1.5832253694534302, + "logps": -85.59701538085938, + "loss": 0.3789, + "objective": 0.37422579526901245, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5333333611488342, + "regularize": 0.37422579526901245, + "step": 80 + }, + { + "dpo_loss": 0.6750870943069458, + "epoch": 0.4818138875767596, + "grad_norm": 12.78905385712093, + "learning_rate": 4.829545454545455e-07, + "logits": -1.5551499128341675, + "logps": -84.24475860595703, + "loss": 0.374, + "objective": 0.39821094274520874, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.39821094274520874, + "step": 85 + }, + { + "dpo_loss": 0.6703960299491882, + "epoch": 0.5101558809636277, + "grad_norm": 14.26040681218726, + "learning_rate": 4.999921328558332e-07, + "logits": -1.37662935256958, + "logps": -86.21568298339844, + "loss": 0.3761, + "objective": 0.3837045729160309, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.4833333194255829, + "regularize": 0.3837045729160309, + "step": 90 + }, + { + "dpo_loss": 0.658724844455719, + "epoch": 0.538497874350496, + "grad_norm": 14.003866252787525, + "learning_rate": 4.999036331701828e-07, + "logits": -1.4695987701416016, + "logps": -85.49458312988281, + "loss": 0.3642, + "objective": 0.39033612608909607, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5666666626930237, + "regularize": 0.39033612608909607, + "step": 95 + }, + { + "dpo_loss": 0.6546652317047119, + "epoch": 0.5668398677373642, + "grad_norm": 13.876424116810778, + "learning_rate": 4.99716834795752e-07, + "logits": -1.5616024732589722, + "logps": -86.23612213134766, + "loss": 0.374, + "objective": 0.4149954915046692, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.4149954915046692, + "step": 100 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.6882808208465576, + "eval_logits": -1.5521211624145508, + "eval_logps": -92.03614807128906, + "eval_loss": 0.4072900712490082, + "eval_objective": 0.40819329023361206, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5144628286361694, + "eval_regularize": 0.40819329023361206, + "eval_runtime": 258.9254, + "eval_samples_per_second": 22.362, + "eval_steps_per_second": 0.935, + "step": 100 + }, + { + "dpo_loss": 0.6509627103805542, + "epoch": 0.5951818611242324, + "grad_norm": 15.031011715031442, + "learning_rate": 4.994318112090048e-07, + "logits": -1.4410721063613892, + "logps": -85.85182189941406, + "loss": 0.3711, + "objective": 0.3764886260032654, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.574999988079071, + "regularize": 0.3764886260032654, + "step": 105 + }, + { + "dpo_loss": 0.6576470136642456, + "epoch": 0.6235238545111006, + "grad_norm": 16.068373019347053, + "learning_rate": 4.990486745229364e-07, + "logits": -1.6439845561981201, + "logps": -84.1036376953125, + "loss": 0.3694, + "objective": 0.39763620495796204, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.48750001192092896, + "regularize": 0.39763620495796204, + "step": 110 + }, + { + "dpo_loss": 0.6543448567390442, + "epoch": 0.6518658478979689, + "grad_norm": 15.729444594038945, + "learning_rate": 4.985675754429743e-07, + "logits": -1.6000815629959106, + "logps": -83.94436645507812, + "loss": 0.3477, + "objective": 0.3455929458141327, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5, + "regularize": 0.3455929458141327, + "step": 115 + }, + { + "dpo_loss": 0.6514815092086792, + "epoch": 0.680207841284837, + "grad_norm": 14.396427732147952, + "learning_rate": 4.979887032076988e-07, + "logits": -1.5459378957748413, + "logps": -85.23513793945312, + "loss": 0.3534, + "objective": 0.3168259561061859, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5458333492279053, + "regularize": 0.3168259561061859, + "step": 120 + }, + { + "dpo_loss": 0.6492612957954407, + "epoch": 0.7085498346717053, + "grad_norm": 16.706445645247783, + "learning_rate": 4.973122855144065e-07, + "logits": -1.5174397230148315, + "logps": -86.0051040649414, + "loss": 0.3448, + "objective": 0.3529473543167114, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.5708333253860474, + "regularize": 0.3529473543167114, + "step": 125 + }, + { + "dpo_loss": 0.65309077501297, + "epoch": 0.7368918280585735, + "grad_norm": 15.417556754357976, + "learning_rate": 4.965385884295466e-07, + "logits": -1.664696455001831, + "logps": -85.23889923095703, + "loss": 0.3464, + "objective": 0.33712950348854065, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.4958333373069763, + "regularize": 0.33712950348854065, + "step": 130 + }, + { + "dpo_loss": 0.6549941301345825, + "epoch": 0.7652338214454416, + "grad_norm": 13.59480500578719, + "learning_rate": 4.956679162840645e-07, + "logits": -1.626897931098938, + "logps": -86.90068817138672, + "loss": 0.3309, + "objective": 0.34302666783332825, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5458333492279053, + "regularize": 0.34302666783332825, + "step": 135 + }, + { + "dpo_loss": 0.6500818729400635, + "epoch": 0.7935758148323099, + "grad_norm": 14.79485288903614, + "learning_rate": 4.947006115536947e-07, + "logits": -1.523794412612915, + "logps": -86.5340576171875, + "loss": 0.3244, + "objective": 0.3356337249279022, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5166666507720947, + "regularize": 0.3356337249279022, + "step": 140 + }, + { + "dpo_loss": 0.6436840295791626, + "epoch": 0.821917808219178, + "grad_norm": 14.29078834943314, + "learning_rate": 4.936370547242482e-07, + "logits": -1.5991618633270264, + "logps": -86.87805938720703, + "loss": 0.3315, + "objective": 0.35039833188056946, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5, + "regularize": 0.35039833188056946, + "step": 145 + }, + { + "dpo_loss": 0.6453251242637634, + "epoch": 0.8502598016060463, + "grad_norm": 14.662823673975787, + "learning_rate": 4.924776641419512e-07, + "logits": -1.5607432126998901, + "logps": -84.04727935791016, + "loss": 0.3231, + "objective": 0.32859519124031067, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5833333134651184, + "regularize": 0.32859519124031067, + "step": 150 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.6880838871002197, + "eval_logits": -1.6073634624481201, + "eval_logps": -92.8072509765625, + "eval_loss": 0.40292537212371826, + "eval_objective": 0.4087039530277252, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5185950398445129, + "eval_regularize": 0.4087039530277252, + "eval_runtime": 258.55, + "eval_samples_per_second": 22.394, + "eval_steps_per_second": 0.936, + "step": 150 + }, + { + "dpo_loss": 0.6386777758598328, + "epoch": 0.8786017949929145, + "grad_norm": 14.231064502358223, + "learning_rate": 4.912228958488892e-07, + "logits": -1.5854390859603882, + "logps": -84.10832214355469, + "loss": 0.3257, + "objective": 0.3301841616630554, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5916666388511658, + "regularize": 0.3301841616630554, + "step": 155 + }, + { + "dpo_loss": 0.635101854801178, + "epoch": 0.9069437883797827, + "grad_norm": 16.2268120086952, + "learning_rate": 4.898732434036243e-07, + "logits": -1.4904930591583252, + "logps": -86.09799194335938, + "loss": 0.3107, + "objective": 0.32787373661994934, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5458333492279053, + "regularize": 0.32787373661994934, + "step": 160 + }, + { + "dpo_loss": 0.632634162902832, + "epoch": 0.9352857817666509, + "grad_norm": 16.041101199008867, + "learning_rate": 4.884292376870567e-07, + "logits": -1.5242409706115723, + "logps": -86.48987579345703, + "loss": 0.3212, + "objective": 0.3137226700782776, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.574999988079071, + "regularize": 0.3137226700782776, + "step": 165 + }, + { + "dpo_loss": 0.6288425922393799, + "epoch": 0.9636277751535192, + "grad_norm": 16.89173365453321, + "learning_rate": 4.868914466936037e-07, + "logits": -1.5360677242279053, + "logps": -86.72618103027344, + "loss": 0.3151, + "objective": 0.30297866463661194, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5708333253860474, + "regularize": 0.30297866463661194, + "step": 170 + }, + { + "dpo_loss": 0.6335378289222717, + "epoch": 0.9919697685403873, + "grad_norm": 15.347989877166441, + "learning_rate": 4.852604753077817e-07, + "logits": -1.4790997505187988, + "logps": -87.8569107055664, + "loss": 0.3103, + "objective": 0.29884618520736694, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6166666746139526, + "regularize": 0.29884618520736694, + "step": 175 + }, + { + "dpo_loss": 0.6220327615737915, + "epoch": 1.0203117619272555, + "grad_norm": 15.022798279596007, + "learning_rate": 4.835369650662767e-07, + "logits": -1.6809762716293335, + "logps": -87.00578308105469, + "loss": 0.2902, + "objective": 0.3023075461387634, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5874999761581421, + "regularize": 0.3023075461387634, + "step": 180 + }, + { + "dpo_loss": 0.6156979203224182, + "epoch": 1.0486537553141237, + "grad_norm": 17.673807880039096, + "learning_rate": 4.817215939055985e-07, + "logits": -1.54806387424469, + "logps": -86.16964721679688, + "loss": 0.2856, + "objective": 0.30200377106666565, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5416666865348816, + "regularize": 0.30200377106666565, + "step": 185 + }, + { + "dpo_loss": 0.6105552315711975, + "epoch": 1.076995748700992, + "grad_norm": 16.47130075175902, + "learning_rate": 4.798150758954164e-07, + "logits": -1.6065795421600342, + "logps": -88.57856750488281, + "loss": 0.2661, + "objective": 0.23887412250041962, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6041666865348816, + "regularize": 0.23887412250041962, + "step": 190 + }, + { + "dpo_loss": 0.6094806790351868, + "epoch": 1.10533774208786, + "grad_norm": 15.979183042956787, + "learning_rate": 4.778181609576831e-07, + "logits": -1.58108651638031, + "logps": -86.33049011230469, + "loss": 0.2734, + "objective": 0.23615716397762299, + "ranking_idealized": 0.7041666507720947, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6333333253860474, + "regularize": 0.23615716397762299, + "step": 195 + }, + { + "dpo_loss": 0.6135362982749939, + "epoch": 1.1336797354747283, + "grad_norm": 15.241345178579065, + "learning_rate": 4.757316345716553e-07, + "logits": -1.6668376922607422, + "logps": -85.64834594726562, + "loss": 0.267, + "objective": 0.2661064565181732, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5541666746139526, + "regularize": 0.2661064565181732, + "step": 200 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.6866354942321777, + "eval_logits": -1.6423935890197754, + "eval_logps": -94.7991943359375, + "eval_loss": 0.4068562686443329, + "eval_objective": 0.41099515557289124, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5185950398445129, + "eval_regularize": 0.41099515557289124, + "eval_runtime": 258.6603, + "eval_samples_per_second": 22.385, + "eval_steps_per_second": 0.936, + "step": 200 + }, + { + "dpo_loss": 0.6104326844215393, + "epoch": 1.1620217288615966, + "grad_norm": 16.09148449696529, + "learning_rate": 4.735563174649278e-07, + "logits": -1.6373622417449951, + "logps": -88.22838592529297, + "loss": 0.2748, + "objective": 0.2591724991798401, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5541666746139526, + "regularize": 0.2591724991798401, + "step": 205 + }, + { + "dpo_loss": 0.6027090549468994, + "epoch": 1.1903637222484649, + "grad_norm": 16.78316844909737, + "learning_rate": 4.7129306529060407e-07, + "logits": -1.604967474937439, + "logps": -88.00846099853516, + "loss": 0.2647, + "objective": 0.28820380568504333, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5541666746139526, + "regularize": 0.28820380568504333, + "step": 210 + }, + { + "dpo_loss": 0.6120165586471558, + "epoch": 1.2187057156353331, + "grad_norm": 17.38824297135803, + "learning_rate": 4.6894276829072786e-07, + "logits": -1.577252745628357, + "logps": -88.0232925415039, + "loss": 0.2457, + "objective": 0.2474772185087204, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5625, + "regularize": 0.2474772185087204, + "step": 215 + }, + { + "dpo_loss": 0.6058060526847839, + "epoch": 1.2470477090222012, + "grad_norm": 16.33918535416867, + "learning_rate": 4.6650635094610966e-07, + "logits": -1.5445390939712524, + "logps": -87.7970199584961, + "loss": 0.25, + "objective": 0.23831520974636078, + "ranking_idealized": 0.7208333611488342, + "ranking_idealized_expo": 0.6041666865348816, + "ranking_simple": 0.637499988079071, + "regularize": 0.23831520974636078, + "step": 220 + }, + { + "dpo_loss": 0.6021844744682312, + "epoch": 1.2753897024090695, + "grad_norm": 14.989005292751132, + "learning_rate": 4.639847716126854e-07, + "logits": -1.6192957162857056, + "logps": -89.04407501220703, + "loss": 0.2607, + "objective": 0.26420968770980835, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6041666865348816, + "regularize": 0.26420965790748596, + "step": 225 + }, + { + "dpo_loss": 0.6108235120773315, + "epoch": 1.3037316957959377, + "grad_norm": 16.245321246774985, + "learning_rate": 4.6137902214455106e-07, + "logits": -1.5698676109313965, + "logps": -89.06554412841797, + "loss": 0.2454, + "objective": 0.24457047879695892, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5541666746139526, + "regularize": 0.24457047879695892, + "step": 230 + }, + { + "dpo_loss": 0.59207683801651, + "epoch": 1.3320736891828058, + "grad_norm": 16.42864016636988, + "learning_rate": 4.5869012750382004e-07, + "logits": -1.6616859436035156, + "logps": -87.82197570800781, + "loss": 0.2583, + "objective": 0.26300859451293945, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6041666865348816, + "regularize": 0.26300859451293945, + "step": 235 + }, + { + "dpo_loss": 0.5991641283035278, + "epoch": 1.360415682569674, + "grad_norm": 17.146089761318706, + "learning_rate": 4.5591914535745817e-07, + "logits": -1.5948702096939087, + "logps": -89.31143188476562, + "loss": 0.2442, + "objective": 0.25130581855773926, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5666666626930237, + "regularize": 0.25130581855773926, + "step": 240 + }, + { + "dpo_loss": 0.6016849279403687, + "epoch": 1.3887576759565423, + "grad_norm": 14.997203138603757, + "learning_rate": 4.5306716566125433e-07, + "logits": -1.6367671489715576, + "logps": -88.17431640625, + "loss": 0.2399, + "objective": 0.23935823142528534, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5833333134651184, + "regularize": 0.23935823142528534, + "step": 245 + }, + { + "dpo_loss": 0.5918813943862915, + "epoch": 1.4170996693434104, + "grad_norm": 15.668454928081044, + "learning_rate": 4.501353102310901e-07, + "logits": -1.5877238512039185, + "logps": -87.66322326660156, + "loss": 0.2432, + "objective": 0.2531537711620331, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5625, + "regularize": 0.2531537711620331, + "step": 250 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6876620650291443, + "eval_logits": -1.6720653772354126, + "eval_logps": -96.13894653320312, + "eval_loss": 0.4107522666454315, + "eval_objective": 0.4137335419654846, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.51962810754776, + "eval_regularize": 0.4137335419654846, + "eval_runtime": 259.3309, + "eval_samples_per_second": 22.327, + "eval_steps_per_second": 0.933, + "step": 250 + }, + { + "dpo_loss": 0.5952737927436829, + "epoch": 1.4454416627302786, + "grad_norm": 16.40280338029817, + "learning_rate": 4.471247323016777e-07, + "logits": -1.5863794088363647, + "logps": -89.24433898925781, + "loss": 0.2442, + "objective": 0.2290959656238556, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6291666626930237, + "regularize": 0.2290959656238556, + "step": 255 + }, + { + "dpo_loss": 0.5954132080078125, + "epoch": 1.473783656117147, + "grad_norm": 16.11674277744465, + "learning_rate": 4.440366160729392e-07, + "logits": -1.6588572263717651, + "logps": -89.44280242919922, + "loss": 0.2443, + "objective": 0.2354036122560501, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5625, + "regularize": 0.2354036122560501, + "step": 260 + }, + { + "dpo_loss": 0.5868396759033203, + "epoch": 1.5021256495040152, + "grad_norm": 16.753008834337265, + "learning_rate": 4.4087217624420585e-07, + "logits": -1.6106855869293213, + "logps": -88.14371490478516, + "loss": 0.239, + "objective": 0.24956756830215454, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5958333611488342, + "regularize": 0.24956756830215454, + "step": 265 + }, + { + "dpo_loss": 0.5915893316268921, + "epoch": 1.5304676428908834, + "grad_norm": 17.579129679111187, + "learning_rate": 4.3763265753642055e-07, + "logits": -1.6173158884048462, + "logps": -90.8720703125, + "loss": 0.2421, + "objective": 0.24301743507385254, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5958333611488342, + "regularize": 0.24301742017269135, + "step": 270 + }, + { + "dpo_loss": 0.5958731770515442, + "epoch": 1.5588096362777515, + "grad_norm": 16.101798479127662, + "learning_rate": 4.34319334202531e-07, + "logits": -1.6187034845352173, + "logps": -90.12999725341797, + "loss": 0.2448, + "objective": 0.22899790108203888, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6208333373069763, + "regularize": 0.22899790108203888, + "step": 275 + }, + { + "dpo_loss": 0.5842003226280212, + "epoch": 1.5871516296646198, + "grad_norm": 15.709789047148108, + "learning_rate": 4.309335095262675e-07, + "logits": -1.5244942903518677, + "logps": -88.604248046875, + "loss": 0.2411, + "objective": 0.23850402235984802, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6000000238418579, + "regularize": 0.23850402235984802, + "step": 280 + }, + { + "dpo_loss": 0.5853084921836853, + "epoch": 1.615493623051488, + "grad_norm": 16.6854633771705, + "learning_rate": 4.274765153095007e-07, + "logits": -1.6502856016159058, + "logps": -89.77727508544922, + "loss": 0.2219, + "objective": 0.21514521539211273, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6041666865348816, + "regularize": 0.21514521539211273, + "step": 285 + }, + { + "dpo_loss": 0.5910848379135132, + "epoch": 1.643835616438356, + "grad_norm": 18.978761606300836, + "learning_rate": 4.239497113483819e-07, + "logits": -1.7089149951934814, + "logps": -86.87386322021484, + "loss": 0.2312, + "objective": 0.23006680607795715, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6291666626930237, + "regularize": 0.23006680607795715, + "step": 290 + }, + { + "dpo_loss": 0.5802692174911499, + "epoch": 1.6721776098252243, + "grad_norm": 16.652074965539576, + "learning_rate": 4.203544848984728e-07, + "logits": -1.5955086946487427, + "logps": -86.49956512451172, + "loss": 0.2276, + "objective": 0.23742005228996277, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5791666507720947, + "regularize": 0.23742005228996277, + "step": 295 + }, + { + "dpo_loss": 0.5916833281517029, + "epoch": 1.7005196032120926, + "grad_norm": 16.884463449554712, + "learning_rate": 4.166922501290729e-07, + "logits": -1.6546835899353027, + "logps": -88.2989730834961, + "loss": 0.2252, + "objective": 0.26394858956336975, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6208333373069763, + "regularize": 0.26394858956336975, + "step": 300 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6866207718849182, + "eval_logits": -1.6648496389389038, + "eval_logps": -95.62443542480469, + "eval_loss": 0.410134494304657, + "eval_objective": 0.4137687385082245, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5216942429542542, + "eval_regularize": 0.4137687385082245, + "eval_runtime": 259.4458, + "eval_samples_per_second": 22.317, + "eval_steps_per_second": 0.933, + "step": 300 + }, + { + "dpo_loss": 0.5952399373054504, + "epoch": 1.7288615965989607, + "grad_norm": 16.24562342201146, + "learning_rate": 4.129644475669616e-07, + "logits": -1.6116312742233276, + "logps": -88.82595825195312, + "loss": 0.218, + "objective": 0.2242499738931656, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5833333134651184, + "regularize": 0.2242499738931656, + "step": 305 + }, + { + "dpo_loss": 0.583368182182312, + "epoch": 1.7572035899858292, + "grad_norm": 18.099666352463437, + "learning_rate": 4.0917254352977206e-07, + "logits": -1.7004183530807495, + "logps": -87.11441040039062, + "loss": 0.2283, + "objective": 0.2325660139322281, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.612500011920929, + "regularize": 0.2325660139322281, + "step": 310 + }, + { + "dpo_loss": 0.5933206677436829, + "epoch": 1.7855455833726972, + "grad_norm": 16.545516113765466, + "learning_rate": 4.053180295492202e-07, + "logits": -1.602583408355713, + "logps": -88.69900512695312, + "loss": 0.2287, + "objective": 0.21895338594913483, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.5833333134651184, + "regularize": 0.21895337104797363, + "step": 315 + }, + { + "dpo_loss": 0.5876157283782959, + "epoch": 1.8138875767595655, + "grad_norm": 18.487916312721516, + "learning_rate": 4.0140242178441665e-07, + "logits": -1.6777514219284058, + "logps": -90.22407531738281, + "loss": 0.2153, + "objective": 0.20208925008773804, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5874999761581421, + "regularize": 0.20208925008773804, + "step": 320 + }, + { + "dpo_loss": 0.5746586918830872, + "epoch": 1.8422295701464337, + "grad_norm": 18.670087833334332, + "learning_rate": 3.9742726042549053e-07, + "logits": -1.7464016675949097, + "logps": -91.97502899169922, + "loss": 0.219, + "objective": 0.2114688903093338, + "ranking_idealized": 0.7166666388511658, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6416666507720947, + "regularize": 0.2114688903093338, + "step": 325 + }, + { + "dpo_loss": 0.5767069458961487, + "epoch": 1.8705715635333018, + "grad_norm": 18.91235181922618, + "learning_rate": 3.933941090877615e-07, + "logits": -1.466091275215149, + "logps": -90.11954498291016, + "loss": 0.219, + "objective": 0.2171897292137146, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.574999988079071, + "regularize": 0.2171897292137146, + "step": 330 + }, + { + "dpo_loss": 0.5894278287887573, + "epoch": 1.89891355692017, + "grad_norm": 15.592599296406116, + "learning_rate": 3.8930455419669744e-07, + "logits": -1.6301844120025635, + "logps": -89.44200134277344, + "loss": 0.2112, + "objective": 0.18907961249351501, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6000000238418579, + "regularize": 0.18907961249351501, + "step": 335 + }, + { + "dpo_loss": 0.5853725075721741, + "epoch": 1.9272555503070383, + "grad_norm": 15.83418724261755, + "learning_rate": 3.851602043638994e-07, + "logits": -1.660121202468872, + "logps": -91.48560333251953, + "loss": 0.2026, + "objective": 0.18658672273159027, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6416666507720947, + "regularize": 0.18658672273159027, + "step": 340 + }, + { + "dpo_loss": 0.5825453400611877, + "epoch": 1.9555975436939064, + "grad_norm": 16.366640560133238, + "learning_rate": 3.809626897543604e-07, + "logits": -1.657557725906372, + "logps": -90.72650909423828, + "loss": 0.1961, + "objective": 0.18415075540542603, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6166666746139526, + "regularize": 0.18415075540542603, + "step": 345 + }, + { + "dpo_loss": 0.5838915109634399, + "epoch": 1.9839395370807746, + "grad_norm": 17.651439137685784, + "learning_rate": 3.7671366144524576e-07, + "logits": -1.551125407218933, + "logps": -91.74525451660156, + "loss": 0.2082, + "objective": 0.20508398115634918, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.5541666746139526, + "regularize": 0.20508398115634918, + "step": 350 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6863144040107727, + "eval_logits": -1.6988588571548462, + "eval_logps": -97.52546691894531, + "eval_loss": 0.4102429747581482, + "eval_objective": 0.4131539762020111, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.51962810754776, + "eval_regularize": 0.4131539762020111, + "eval_runtime": 258.5165, + "eval_samples_per_second": 22.397, + "eval_steps_per_second": 0.936, + "step": 350 + }, + { + "dpo_loss": 0.5745717287063599, + "epoch": 2.012281530467643, + "grad_norm": 15.784086525377202, + "learning_rate": 3.724147907764478e-07, + "logits": -1.5323989391326904, + "logps": -90.18486785888672, + "loss": 0.2055, + "objective": 0.20713359117507935, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6458333134651184, + "regularize": 0.20713359117507935, + "step": 355 + }, + { + "dpo_loss": 0.5675494074821472, + "epoch": 2.040623523854511, + "grad_norm": 20.55210866626824, + "learning_rate": 3.6806776869317067e-07, + "logits": -1.6239458322525024, + "logps": -89.69377899169922, + "loss": 0.1726, + "objective": 0.17787505686283112, + "ranking_idealized": 0.7250000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.675000011920929, + "regularize": 0.17787505686283112, + "step": 360 + }, + { + "dpo_loss": 0.5650666952133179, + "epoch": 2.0689655172413794, + "grad_norm": 16.544231581396616, + "learning_rate": 3.636743050808028e-07, + "logits": -1.6872822046279907, + "logps": -91.26659393310547, + "loss": 0.1866, + "objective": 0.16895455121994019, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6333333253860474, + "regularize": 0.16895455121994019, + "step": 365 + }, + { + "dpo_loss": 0.575705349445343, + "epoch": 2.0973075106282475, + "grad_norm": 16.758890304778106, + "learning_rate": 3.5923612809233984e-07, + "logits": -1.662663221359253, + "logps": -87.82825469970703, + "loss": 0.1679, + "objective": 0.1752353459596634, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.574999988079071, + "regularize": 0.1752353310585022, + "step": 370 + }, + { + "dpo_loss": 0.5633853077888489, + "epoch": 2.1256495040151155, + "grad_norm": 16.99783941953761, + "learning_rate": 3.5475498346862214e-07, + "logits": -1.6271302700042725, + "logps": -91.13916015625, + "loss": 0.1726, + "objective": 0.16911908984184265, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.6791666746139526, + "regularize": 0.16911907494068146, + "step": 375 + }, + { + "dpo_loss": 0.5752108097076416, + "epoch": 2.153991497401984, + "grad_norm": 17.23340187781712, + "learning_rate": 3.502326338516534e-07, + "logits": -1.5394021272659302, + "logps": -89.99533081054688, + "loss": 0.179, + "objective": 0.1650255024433136, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6000000238418579, + "regularize": 0.1650255024433136, + "step": 380 + }, + { + "dpo_loss": 0.571977972984314, + "epoch": 2.182333490788852, + "grad_norm": 15.78796183229778, + "learning_rate": 3.4567085809127245e-07, + "logits": -1.6716177463531494, + "logps": -91.3305892944336, + "loss": 0.1653, + "objective": 0.13291777670383453, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.612500011920929, + "regularize": 0.13291777670383453, + "step": 385 + }, + { + "dpo_loss": 0.5752423405647278, + "epoch": 2.21067548417572, + "grad_norm": 18.129151048308177, + "learning_rate": 3.4107145054544855e-07, + "logits": -1.5358682870864868, + "logps": -91.15263366699219, + "loss": 0.1744, + "objective": 0.16379062831401825, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5625, + "regularize": 0.16379062831401825, + "step": 390 + }, + { + "dpo_loss": 0.5485681891441345, + "epoch": 2.2390174775625886, + "grad_norm": 16.313781937896024, + "learning_rate": 3.3643622037447767e-07, + "logits": -1.5593619346618652, + "logps": -92.42921447753906, + "loss": 0.1776, + "objective": 0.1637614667415619, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6416666507720947, + "regularize": 0.1637614667415619, + "step": 395 + }, + { + "dpo_loss": 0.5597947239875793, + "epoch": 2.2673594709494567, + "grad_norm": 16.659127876259, + "learning_rate": 3.317669908293554e-07, + "logits": -1.631813645362854, + "logps": -92.92410278320312, + "loss": 0.1825, + "objective": 0.196553573012352, + "ranking_idealized": 0.7583333253860474, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.6958333253860474, + "regularize": 0.1965535581111908, + "step": 400 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6862595677375793, + "eval_logits": -1.6931663751602173, + "eval_logps": -97.79962158203125, + "eval_loss": 0.4124037027359009, + "eval_objective": 0.4144473969936371, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.4144473969936371, + "eval_runtime": 258.4529, + "eval_samples_per_second": 22.403, + "eval_steps_per_second": 0.936, + "step": 400 + }, + { + "dpo_loss": 0.5691500902175903, + "epoch": 2.295701464336325, + "grad_norm": 17.30117286858182, + "learning_rate": 3.270655985346081e-07, + "logits": -1.7139372825622559, + "logps": -89.78938293457031, + "loss": 0.1717, + "objective": 0.18224166333675385, + "ranking_idealized": 0.737500011920929, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6458333134651184, + "regularize": 0.18224166333675385, + "step": 405 + }, + { + "dpo_loss": 0.5579439997673035, + "epoch": 2.324043457723193, + "grad_norm": 17.123218301010457, + "learning_rate": 3.223338927658632e-07, + "logits": -1.5741162300109863, + "logps": -91.07009887695312, + "loss": 0.1618, + "objective": 0.15759395062923431, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5958333611488342, + "regularize": 0.15759395062923431, + "step": 410 + }, + { + "dpo_loss": 0.5704253315925598, + "epoch": 2.3523854511100613, + "grad_norm": 17.68931154440285, + "learning_rate": 3.175737347224432e-07, + "logits": -1.6476367712020874, + "logps": -91.30075073242188, + "loss": 0.1732, + "objective": 0.19281157851219177, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.612500011920929, + "regularize": 0.19281157851219177, + "step": 415 + }, + { + "dpo_loss": 0.5771389603614807, + "epoch": 2.3807274444969297, + "grad_norm": 17.274582557860825, + "learning_rate": 3.1278699679526975e-07, + "logits": -1.5415838956832886, + "logps": -92.63572692871094, + "loss": 0.1579, + "objective": 0.15308959782123566, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.5958333611488342, + "regularize": 0.15308959782123566, + "step": 420 + }, + { + "dpo_loss": 0.566936731338501, + "epoch": 2.409069437883798, + "grad_norm": 16.445557447346342, + "learning_rate": 3.0797556183036575e-07, + "logits": -1.5967096090316772, + "logps": -91.4622802734375, + "loss": 0.1607, + "objective": 0.16068215668201447, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6000000238418579, + "regularize": 0.16068214178085327, + "step": 425 + }, + { + "dpo_loss": 0.5632474422454834, + "epoch": 2.4374114312706663, + "grad_norm": 15.962055488306607, + "learning_rate": 3.0314132238824415e-07, + "logits": -1.6247813701629639, + "logps": -92.1604995727539, + "loss": 0.1547, + "objective": 0.1360505074262619, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.625, + "regularize": 0.1360505074262619, + "step": 430 + }, + { + "dpo_loss": 0.566851019859314, + "epoch": 2.4657534246575343, + "grad_norm": 16.006081940650837, + "learning_rate": 2.982861799994764e-07, + "logits": -1.6544443368911743, + "logps": -92.63692474365234, + "loss": 0.1637, + "objective": 0.17756709456443787, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6208333373069763, + "regularize": 0.17756709456443787, + "step": 435 + }, + { + "dpo_loss": 0.5565729141235352, + "epoch": 2.4940954180444024, + "grad_norm": 17.37344722468487, + "learning_rate": 2.934120444167326e-07, + "logits": -1.5883994102478027, + "logps": -91.88066101074219, + "loss": 0.159, + "objective": 0.15150482952594757, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6041666865348816, + "regularize": 0.15150482952594757, + "step": 440 + }, + { + "dpo_loss": 0.565682590007782, + "epoch": 2.5224374114312704, + "grad_norm": 18.453788667979182, + "learning_rate": 2.885208328635864e-07, + "logits": -1.6123565435409546, + "logps": -89.5006332397461, + "loss": 0.1576, + "objective": 0.1587233543395996, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6458333134651184, + "regularize": 0.1587233543395996, + "step": 445 + }, + { + "dpo_loss": 0.5824019312858582, + "epoch": 2.550779404818139, + "grad_norm": 19.424550718198045, + "learning_rate": 2.83614469280383e-07, + "logits": -1.6537593603134155, + "logps": -91.4095230102539, + "loss": 0.1504, + "objective": 0.15120406448841095, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5791666507720947, + "regularize": 0.15120406448841095, + "step": 450 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.6864377856254578, + "eval_logits": -1.711348056793213, + "eval_logps": -99.202880859375, + "eval_loss": 0.41492125391960144, + "eval_objective": 0.4176488518714905, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5216942429542542, + "eval_regularize": 0.4176488518714905, + "eval_runtime": 258.9375, + "eval_samples_per_second": 22.361, + "eval_steps_per_second": 0.935, + "step": 450 + }, + { + "dpo_loss": 0.5587320923805237, + "epoch": 2.579121398205007, + "grad_norm": 18.174711126742732, + "learning_rate": 2.786948835674634e-07, + "logits": -1.6923545598983765, + "logps": -92.0631103515625, + "loss": 0.1514, + "objective": 0.15467478334903717, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6416666507720947, + "regularize": 0.15467478334903717, + "step": 455 + }, + { + "dpo_loss": 0.5529297590255737, + "epoch": 2.6074633915918755, + "grad_norm": 18.378396938924546, + "learning_rate": 2.737640108260456e-07, + "logits": -1.765284776687622, + "logps": -92.5921401977539, + "loss": 0.1544, + "objective": 0.13981758058071136, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6291666626930237, + "regularize": 0.13981756567955017, + "step": 460 + }, + { + "dpo_loss": 0.5604754090309143, + "epoch": 2.6358053849787435, + "grad_norm": 17.16312208138119, + "learning_rate": 2.6882379059705953e-07, + "logits": -1.6412590742111206, + "logps": -91.83204650878906, + "loss": 0.1571, + "objective": 0.15992027521133423, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5791666507720947, + "regularize": 0.15992026031017303, + "step": 465 + }, + { + "dpo_loss": 0.5741956830024719, + "epoch": 2.6641473783656116, + "grad_norm": 17.444271577746782, + "learning_rate": 2.6387616609823504e-07, + "logits": -1.6750518083572388, + "logps": -91.33477020263672, + "loss": 0.151, + "objective": 0.17329135537147522, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.625, + "regularize": 0.17329135537147522, + "step": 470 + }, + { + "dpo_loss": 0.5746079087257385, + "epoch": 2.69248937175248, + "grad_norm": 17.607595627923466, + "learning_rate": 2.5892308345974514e-07, + "logits": -1.6217347383499146, + "logps": -90.19564819335938, + "loss": 0.1521, + "objective": 0.1534017026424408, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6166666746139526, + "regularize": 0.1534017026424408, + "step": 475 + }, + { + "dpo_loss": 0.56805020570755, + "epoch": 2.720831365139348, + "grad_norm": 18.441983400540806, + "learning_rate": 2.53966490958702e-07, + "logits": -1.7197903394699097, + "logps": -90.20177459716797, + "loss": 0.148, + "objective": 0.14620445668697357, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6291666626930237, + "regularize": 0.14620442688465118, + "step": 480 + }, + { + "dpo_loss": 0.5559974312782288, + "epoch": 2.7491733585262166, + "grad_norm": 17.00220355810742, + "learning_rate": 2.4900833825280967e-07, + "logits": -1.628369927406311, + "logps": -93.048828125, + "loss": 0.1488, + "objective": 0.1451708972454071, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6583333611488342, + "regularize": 0.1451708972454071, + "step": 485 + }, + { + "dpo_loss": 0.555105984210968, + "epoch": 2.7775153519130846, + "grad_norm": 17.798810379621077, + "learning_rate": 2.4405057561347313e-07, + "logits": -1.647185206413269, + "logps": -90.4990463256836, + "loss": 0.1613, + "objective": 0.17200501263141632, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.6208333373069763, + "regularize": 0.17200501263141632, + "step": 490 + }, + { + "dpo_loss": 0.5594576001167297, + "epoch": 2.8058573452999527, + "grad_norm": 18.076540126591944, + "learning_rate": 2.39095153158666e-07, + "logits": -1.6548616886138916, + "logps": -90.19225311279297, + "loss": 0.1504, + "objective": 0.1365150660276413, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.6208333373069763, + "regularize": 0.1365150511264801, + "step": 495 + }, + { + "dpo_loss": 0.5552747845649719, + "epoch": 2.8341993386868207, + "grad_norm": 17.278782223651127, + "learning_rate": 2.3414402008585886e-07, + "logits": -1.6857832670211792, + "logps": -89.0853500366211, + "loss": 0.1494, + "objective": 0.15246258676052094, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6416666507720947, + "regularize": 0.15246258676052094, + "step": 500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.6861580014228821, + "eval_logits": -1.7174702882766724, + "eval_logps": -99.17545318603516, + "eval_loss": 0.41525644063949585, + "eval_objective": 0.4182237386703491, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5227272510528564, + "eval_regularize": 0.4182237386703491, + "eval_runtime": 259.2438, + "eval_samples_per_second": 22.334, + "eval_steps_per_second": 0.933, + "step": 500 + }, + { + "dpo_loss": 0.5660989284515381, + "epoch": 2.862541332073689, + "grad_norm": 18.182680782212074, + "learning_rate": 2.2919912390530943e-07, + "logits": -1.6143929958343506, + "logps": -91.0888900756836, + "loss": 0.1437, + "objective": 0.16082407534122467, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6333333253860474, + "regularize": 0.16082406044006348, + "step": 505 + }, + { + "dpo_loss": 0.5675150752067566, + "epoch": 2.8908833254605573, + "grad_norm": 16.373132303441977, + "learning_rate": 2.2426240967401638e-07, + "logits": -1.5807684659957886, + "logps": -91.39689636230469, + "loss": 0.1433, + "objective": 0.1494457870721817, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6291666626930237, + "regularize": 0.1494457870721817, + "step": 510 + }, + { + "dpo_loss": 0.5627566576004028, + "epoch": 2.9192253188474258, + "grad_norm": 18.008132213394468, + "learning_rate": 2.1933581923063837e-07, + "logits": -1.7557440996170044, + "logps": -91.32353210449219, + "loss": 0.1448, + "objective": 0.13260915875434875, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.6291666626930237, + "regularize": 0.13260914385318756, + "step": 515 + }, + { + "dpo_loss": 0.5646940469741821, + "epoch": 2.947567312234294, + "grad_norm": 17.30767973762921, + "learning_rate": 2.1442129043167873e-07, + "logits": -1.610668420791626, + "logps": -92.7865219116211, + "loss": 0.1368, + "objective": 0.11772733181715012, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6291666626930237, + "regularize": 0.11772733181715012, + "step": 520 + }, + { + "dpo_loss": 0.5658089518547058, + "epoch": 2.975909305621162, + "grad_norm": 18.116492800551395, + "learning_rate": 2.0952075638923652e-07, + "logits": -1.6272333860397339, + "logps": -92.43870544433594, + "loss": 0.1424, + "objective": 0.15236981213092804, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6416666507720947, + "regularize": 0.15236981213092804, + "step": 525 + }, + { + "dpo_loss": 0.5534684658050537, + "epoch": 3.0042512990080303, + "grad_norm": 18.337044286762765, + "learning_rate": 2.0463614471062435e-07, + "logits": -1.6210473775863647, + "logps": -91.47294616699219, + "loss": 0.1502, + "objective": 0.17477649450302124, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6291666626930237, + "regularize": 0.17477649450302124, + "step": 530 + }, + { + "dpo_loss": 0.5659457445144653, + "epoch": 3.0325932923948984, + "grad_norm": 16.444884726429134, + "learning_rate": 1.9976937674015026e-07, + "logits": -1.6844907999038696, + "logps": -93.2222671508789, + "loss": 0.1284, + "objective": 0.14268328249454498, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6583333611488342, + "regularize": 0.14268328249454498, + "step": 535 + }, + { + "dpo_loss": 0.5521051287651062, + "epoch": 3.0609352857817664, + "grad_norm": 19.963683437356444, + "learning_rate": 1.9492236680336483e-07, + "logits": -1.7760847806930542, + "logps": -90.89082336425781, + "loss": 0.1216, + "objective": 0.10329335182905197, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6541666388511658, + "regularize": 0.10329335182905197, + "step": 540 + }, + { + "dpo_loss": 0.5619763731956482, + "epoch": 3.089277279168635, + "grad_norm": 17.450130382767895, + "learning_rate": 1.9009702145406724e-07, + "logits": -1.6995065212249756, + "logps": -92.40625, + "loss": 0.1232, + "objective": 0.1230437308549881, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.6000000238418579, + "regularize": 0.1230437308549881, + "step": 545 + }, + { + "dpo_loss": 0.5528106689453125, + "epoch": 3.117619272555503, + "grad_norm": 18.245098236126562, + "learning_rate": 1.8529523872436977e-07, + "logits": -1.5086556673049927, + "logps": -92.30103302001953, + "loss": 0.1407, + "objective": 0.12957319617271423, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6791666746139526, + "regularize": 0.12957318127155304, + "step": 550 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 0.6856257915496826, + "eval_logits": -1.7183054685592651, + "eval_logps": -99.2997055053711, + "eval_loss": 0.4161340296268463, + "eval_objective": 0.41743505001068115, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5216942429542542, + "eval_regularize": 0.41743505001068115, + "eval_runtime": 258.7783, + "eval_samples_per_second": 22.374, + "eval_steps_per_second": 0.935, + "step": 550 + }, + { + "dpo_loss": 0.5473430752754211, + "epoch": 3.1459612659423715, + "grad_norm": 18.87722095427309, + "learning_rate": 1.8051890737811393e-07, + "logits": -1.6218358278274536, + "logps": -93.05738067626953, + "loss": 0.1336, + "objective": 0.1305130124092102, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6625000238418579, + "regularize": 0.1305130124092102, + "step": 555 + }, + { + "dpo_loss": 0.5478367209434509, + "epoch": 3.1743032593292395, + "grad_norm": 22.714698597290123, + "learning_rate": 1.7576990616793137e-07, + "logits": -1.601859211921692, + "logps": -90.21554565429688, + "loss": 0.1212, + "objective": 0.10795855522155762, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.637499988079071, + "regularize": 0.10795855522155762, + "step": 560 + }, + { + "dpo_loss": 0.5566601157188416, + "epoch": 3.2026452527161076, + "grad_norm": 24.322678833478967, + "learning_rate": 1.710501030962438e-07, + "logits": -1.663177728652954, + "logps": -91.7726058959961, + "loss": 0.1298, + "objective": 0.13216590881347656, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6458333134651184, + "regularize": 0.13216587901115417, + "step": 565 + }, + { + "dpo_loss": 0.5519458055496216, + "epoch": 3.230987246102976, + "grad_norm": 19.102063233264193, + "learning_rate": 1.663613546804912e-07, + "logits": -1.5763607025146484, + "logps": -91.98208618164062, + "loss": 0.1293, + "objective": 0.13738204538822174, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5916666388511658, + "regularize": 0.13738203048706055, + "step": 570 + }, + { + "dpo_loss": 0.5559364557266235, + "epoch": 3.259329239489844, + "grad_norm": 16.15481429380041, + "learning_rate": 1.617055052228768e-07, + "logits": -1.6705526113510132, + "logps": -92.17435455322266, + "loss": 0.1266, + "objective": 0.12801046669483185, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6499999761581421, + "regularize": 0.12801046669483185, + "step": 575 + }, + { + "dpo_loss": 0.5649384260177612, + "epoch": 3.287671232876712, + "grad_norm": 17.44743081337015, + "learning_rate": 1.5708438608491815e-07, + "logits": -1.6591442823410034, + "logps": -93.50952911376953, + "loss": 0.1277, + "objective": 0.11801984906196594, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6291666626930237, + "regularize": 0.11801984906196594, + "step": 580 + }, + { + "dpo_loss": 0.5595548152923584, + "epoch": 3.3160132262635806, + "grad_norm": 16.72082331684023, + "learning_rate": 1.524998149670871e-07, + "logits": -1.69523286819458, + "logps": -93.74117279052734, + "loss": 0.12, + "objective": 0.10769928246736526, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6583333611488342, + "regularize": 0.10769927501678467, + "step": 585 + }, + { + "dpo_loss": 0.561581015586853, + "epoch": 3.3443552196504487, + "grad_norm": 19.465809423510365, + "learning_rate": 1.479535951938243e-07, + "logits": -1.7049933671951294, + "logps": -93.83617401123047, + "loss": 0.1205, + "objective": 0.09809862077236176, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.637499988079071, + "regularize": 0.09809862077236176, + "step": 590 + }, + { + "dpo_loss": 0.5538628101348877, + "epoch": 3.372697213037317, + "grad_norm": 17.81052400873953, + "learning_rate": 1.43447515004208e-07, + "logits": -1.613613247871399, + "logps": -92.85578155517578, + "loss": 0.1191, + "objective": 0.12334737926721573, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.637499988079071, + "regularize": 0.12334737926721573, + "step": 595 + }, + { + "dpo_loss": 0.5357978940010071, + "epoch": 3.4010392064241852, + "grad_norm": 18.626853535104544, + "learning_rate": 1.3898334684855645e-07, + "logits": -1.624743938446045, + "logps": -92.40316009521484, + "loss": 0.1149, + "objective": 0.13463754951953888, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6458333134651184, + "regularize": 0.13463754951953888, + "step": 600 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 0.6852067112922668, + "eval_logits": -1.71807062625885, + "eval_logps": -99.92455291748047, + "eval_loss": 0.41705650091171265, + "eval_objective": 0.41811424493789673, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.41811424493789673, + "eval_runtime": 259.0859, + "eval_samples_per_second": 22.348, + "eval_steps_per_second": 0.934, + "step": 600 + }, + { + "dpo_loss": 0.5652448534965515, + "epoch": 3.4293811998110533, + "grad_norm": 17.958926430591173, + "learning_rate": 1.3456284669124157e-07, + "logits": -1.6740020513534546, + "logps": -94.55862426757812, + "loss": 0.1179, + "objective": 0.11572790890932083, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.625, + "regularize": 0.11572788655757904, + "step": 605 + }, + { + "dpo_loss": 0.5543821454048157, + "epoch": 3.4577231931979218, + "grad_norm": 17.326848783729876, + "learning_rate": 1.301877533199859e-07, + "logits": -1.6315828561782837, + "logps": -92.49845886230469, + "loss": 0.1149, + "objective": 0.1067105308175087, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6499999761581421, + "regularize": 0.1067105159163475, + "step": 610 + }, + { + "dpo_loss": 0.5443283915519714, + "epoch": 3.48606518658479, + "grad_norm": 16.84586393500809, + "learning_rate": 1.2585978766191724e-07, + "logits": -1.664933681488037, + "logps": -93.27455139160156, + "loss": 0.1142, + "objective": 0.10945113748311996, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.5958333611488342, + "regularize": 0.10945113748311996, + "step": 615 + }, + { + "dpo_loss": 0.5419160723686218, + "epoch": 3.514407179971658, + "grad_norm": 17.32874521556865, + "learning_rate": 1.2158065210664848e-07, + "logits": -1.5332224369049072, + "logps": -92.34308624267578, + "loss": 0.1203, + "objective": 0.12084861099720001, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6166666746139526, + "regularize": 0.12084860354661942, + "step": 620 + }, + { + "dpo_loss": 0.5541211366653442, + "epoch": 3.5427491733585263, + "grad_norm": 17.48530471086995, + "learning_rate": 1.1735202983664802e-07, + "logits": -1.6171096563339233, + "logps": -91.3125991821289, + "loss": 0.1178, + "objective": 0.11125477403402328, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6416666507720947, + "regularize": 0.11125477403402328, + "step": 625 + }, + { + "dpo_loss": 0.5698776245117188, + "epoch": 3.5710911667453944, + "grad_norm": 17.864701578880954, + "learning_rate": 1.1317558416516696e-07, + "logits": -1.697689175605774, + "logps": -91.67240905761719, + "loss": 0.1261, + "objective": 0.13253255188465118, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6083333492279053, + "regularize": 0.13253255188465118, + "step": 630 + }, + { + "dpo_loss": 0.5599467158317566, + "epoch": 3.5994331601322624, + "grad_norm": 17.33519253157568, + "learning_rate": 1.090529578819799e-07, + "logits": -1.6461411714553833, + "logps": -91.57376098632812, + "loss": 0.1157, + "objective": 0.10732007026672363, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6208333373069763, + "regularize": 0.10732006281614304, + "step": 635 + }, + { + "dpo_loss": 0.540539562702179, + "epoch": 3.627775153519131, + "grad_norm": 17.025667462047203, + "learning_rate": 1.0498577260720048e-07, + "logits": -1.5717778205871582, + "logps": -93.14022827148438, + "loss": 0.1146, + "objective": 0.13500064611434937, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.574999988079071, + "regularize": 0.13500064611434937, + "step": 640 + }, + { + "dpo_loss": 0.5469278693199158, + "epoch": 3.656117146905999, + "grad_norm": 17.536092815770388, + "learning_rate": 1.0097562815342214e-07, + "logits": -1.6058826446533203, + "logps": -90.76680755615234, + "loss": 0.1144, + "objective": 0.1191474050283432, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6291666626930237, + "regularize": 0.1191474050283432, + "step": 645 + }, + { + "dpo_loss": 0.5611483454704285, + "epoch": 3.6844591402928675, + "grad_norm": 17.646206320924833, + "learning_rate": 9.702410189643836e-08, + "logits": -1.6121342182159424, + "logps": -92.83375549316406, + "loss": 0.1108, + "objective": 0.09943919628858566, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6083333492279053, + "regularize": 0.09943918883800507, + "step": 650 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 0.6852837800979614, + "eval_logits": -1.7315040826797485, + "eval_logps": -99.91177368164062, + "eval_loss": 0.41784536838531494, + "eval_objective": 0.41884875297546387, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5247933864593506, + "eval_regularize": 0.41884875297546387, + "eval_runtime": 259.4431, + "eval_samples_per_second": 22.317, + "eval_steps_per_second": 0.933, + "step": 650 + }, + { + "dpo_loss": 0.5510907769203186, + "epoch": 3.7128011336797355, + "grad_norm": 17.84028807284025, + "learning_rate": 9.313274815478698e-08, + "logits": -1.6280105113983154, + "logps": -92.27388763427734, + "loss": 0.117, + "objective": 0.10249165445566177, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6291666626930237, + "regularize": 0.10249165445566177, + "step": 655 + }, + { + "dpo_loss": 0.5551621913909912, + "epoch": 3.7411431270666036, + "grad_norm": 17.863904309670215, + "learning_rate": 8.930309757836516e-08, + "logits": -1.7605994939804077, + "logps": -92.74076080322266, + "loss": 0.1162, + "objective": 0.11682406812906265, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6625000238418579, + "regularize": 0.11682406812906265, + "step": 660 + }, + { + "dpo_loss": 0.5553780198097229, + "epoch": 3.769485120453472, + "grad_norm": 17.287090327509993, + "learning_rate": 8.553665654635342e-08, + "logits": -1.6500779390335083, + "logps": -92.00687408447266, + "loss": 0.116, + "objective": 0.11367592960596085, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6541666388511658, + "regularize": 0.11367591470479965, + "step": 665 + }, + { + "dpo_loss": 0.5561904311180115, + "epoch": 3.79782711384034, + "grad_norm": 18.08647866984471, + "learning_rate": 8.183490657468686e-08, + "logits": -1.7430044412612915, + "logps": -92.36637878417969, + "loss": 0.1153, + "objective": 0.13086958229541779, + "ranking_idealized": 0.7333333492279053, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.7124999761581421, + "regularize": 0.13086958229541779, + "step": 670 + }, + { + "dpo_loss": 0.5525475740432739, + "epoch": 3.826169107227208, + "grad_norm": 17.678219555630616, + "learning_rate": 7.819930373330669e-08, + "logits": -1.6892848014831543, + "logps": -91.58055114746094, + "loss": 0.1165, + "objective": 0.10088498890399933, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6416666507720947, + "regularize": 0.10088498145341873, + "step": 675 + }, + { + "dpo_loss": 0.5708147883415222, + "epoch": 3.8545111006140766, + "grad_norm": 17.208804705028182, + "learning_rate": 7.463127807341966e-08, + "logits": -1.6462949514389038, + "logps": -92.41487884521484, + "loss": 0.1181, + "objective": 0.11868777871131897, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6208333373069763, + "regularize": 0.11868777871131897, + "step": 680 + }, + { + "dpo_loss": 0.5442604422569275, + "epoch": 3.8828530940009447, + "grad_norm": 18.600166194890996, + "learning_rate": 7.113223306499336e-08, + "logits": -1.7259678840637207, + "logps": -91.63528442382812, + "loss": 0.1127, + "objective": 0.10472600162029266, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6499999761581421, + "regularize": 0.10472600162029266, + "step": 685 + }, + { + "dpo_loss": 0.5496628284454346, + "epoch": 3.9111950873878127, + "grad_norm": 17.80496655704031, + "learning_rate": 6.770354504470574e-08, + "logits": -1.6540542840957642, + "logps": -90.78262329101562, + "loss": 0.1164, + "objective": 0.10735266655683517, + "ranking_idealized": 0.7041666507720947, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6875, + "regularize": 0.10735265165567398, + "step": 690 + }, + { + "dpo_loss": 0.5639461874961853, + "epoch": 3.9395370807746812, + "grad_norm": 16.492816826616206, + "learning_rate": 6.434656267456842e-08, + "logits": -1.6047898530960083, + "logps": -92.38011932373047, + "loss": 0.1193, + "objective": 0.12910698354244232, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.5833333134651184, + "regularize": 0.12910698354244232, + "step": 695 + }, + { + "dpo_loss": 0.5509793162345886, + "epoch": 3.9678790741615493, + "grad_norm": 19.17114576937693, + "learning_rate": 6.106260641143546e-08, + "logits": -1.6564711332321167, + "logps": -92.65071868896484, + "loss": 0.1146, + "objective": 0.1030283123254776, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5958333611488342, + "regularize": 0.103028304874897, + "step": 700 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 0.6854080557823181, + "eval_logits": -1.7319272756576538, + "eval_logps": -99.89824676513672, + "eval_loss": 0.4175797998905182, + "eval_objective": 0.4186650514602661, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.4186650514602661, + "eval_runtime": 258.7065, + "eval_samples_per_second": 22.381, + "eval_steps_per_second": 0.935, + "step": 700 + }, + { + "dpo_loss": 0.5571620464324951, + "epoch": 3.9962210675484178, + "grad_norm": 18.74884098276965, + "learning_rate": 5.7852967987606e-08, + "logits": -1.554320216178894, + "logps": -90.9109878540039, + "loss": 0.1135, + "objective": 0.12702669203281403, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6583333611488342, + "regularize": 0.12702666223049164, + "step": 705 + }, + { + "dpo_loss": 0.5528541803359985, + "epoch": 4.024563060935286, + "grad_norm": 17.40621172754528, + "learning_rate": 5.471890990272665e-08, + "logits": -1.6468113660812378, + "logps": -92.61128997802734, + "loss": 0.1135, + "objective": 0.1373264044523239, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.612500011920929, + "regularize": 0.13732638955116272, + "step": 710 + }, + { + "dpo_loss": 0.5556226968765259, + "epoch": 4.052905054322154, + "grad_norm": 18.812501686123863, + "learning_rate": 5.166166492719124e-08, + "logits": -1.6049120426177979, + "logps": -92.74799346923828, + "loss": 0.1035, + "objective": 0.10493499785661697, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6458333134651184, + "regularize": 0.10493497550487518, + "step": 715 + }, + { + "dpo_loss": 0.5424375534057617, + "epoch": 4.081247047709022, + "grad_norm": 17.435339866299028, + "learning_rate": 4.868243561723534e-08, + "logits": -1.511703372001648, + "logps": -94.71248626708984, + "loss": 0.0978, + "objective": 0.09467672556638718, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6166666746139526, + "regularize": 0.09467671811580658, + "step": 720 + }, + { + "dpo_loss": 0.5652304291725159, + "epoch": 4.109589041095891, + "grad_norm": 17.507850551258127, + "learning_rate": 4.578239384191529e-08, + "logits": -1.6384118795394897, + "logps": -92.1180191040039, + "loss": 0.1076, + "objective": 0.09941933304071426, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.637499988079071, + "regularize": 0.09941932559013367, + "step": 725 + }, + { + "dpo_loss": 0.5497789978981018, + "epoch": 4.137931034482759, + "grad_norm": 18.803004868454263, + "learning_rate": 4.296268032215733e-08, + "logits": -1.7138111591339111, + "logps": -91.83662414550781, + "loss": 0.1086, + "objective": 0.10822432488203049, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6208333373069763, + "regularize": 0.10822432488203049, + "step": 730 + }, + { + "dpo_loss": 0.55525803565979, + "epoch": 4.166273027869627, + "grad_norm": 18.177348382836357, + "learning_rate": 4.022440418205944e-08, + "logits": -1.6232236623764038, + "logps": -93.14463806152344, + "loss": 0.1028, + "objective": 0.10451411455869675, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6541666388511658, + "regularize": 0.10451411455869675, + "step": 735 + }, + { + "dpo_loss": 0.535234808921814, + "epoch": 4.194615021256495, + "grad_norm": 17.133543410858152, + "learning_rate": 3.756864251262143e-08, + "logits": -1.610323190689087, + "logps": -93.36137390136719, + "loss": 0.1062, + "objective": 0.10550294071435928, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6541666388511658, + "regularize": 0.10550292581319809, + "step": 740 + }, + { + "dpo_loss": 0.546442449092865, + "epoch": 4.222957014643363, + "grad_norm": 18.78547392108143, + "learning_rate": 3.4996439948074855e-08, + "logits": -1.6879092454910278, + "logps": -90.12301635742188, + "loss": 0.1001, + "objective": 0.10297367721796036, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6208333373069763, + "regularize": 0.10297366231679916, + "step": 745 + }, + { + "dpo_loss": 0.5416663289070129, + "epoch": 4.251299008030231, + "grad_norm": 19.462231175744662, + "learning_rate": 3.250880825498026e-08, + "logits": -1.8104737997055054, + "logps": -92.32807922363281, + "loss": 0.0986, + "objective": 0.10099396854639053, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6291666626930237, + "regularize": 0.10099395364522934, + "step": 750 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 0.6853212714195251, + "eval_logits": -1.7322306632995605, + "eval_logps": -99.86943054199219, + "eval_loss": 0.41747406125068665, + "eval_objective": 0.41828420758247375, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.41828420758247375, + "eval_runtime": 258.964, + "eval_samples_per_second": 22.358, + "eval_steps_per_second": 0.934, + "step": 750 + }, + { + "dpo_loss": 0.5468146800994873, + "epoch": 4.2796410014171, + "grad_norm": 18.270752967286892, + "learning_rate": 3.010672593425209e-08, + "logits": -1.7138711214065552, + "logps": -92.11996459960938, + "loss": 0.1147, + "objective": 0.10049024224281311, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6458333134651184, + "regularize": 0.10049023479223251, + "step": 755 + }, + { + "dpo_loss": 0.5455428957939148, + "epoch": 4.307982994803968, + "grad_norm": 17.285027330879423, + "learning_rate": 2.7791137836269158e-08, + "logits": -1.6757961511611938, + "logps": -93.05391693115234, + "loss": 0.0961, + "objective": 0.10086000710725784, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6833333373069763, + "regularize": 0.10086000710725784, + "step": 760 + }, + { + "dpo_loss": 0.5395826101303101, + "epoch": 4.336324988190836, + "grad_norm": 17.680760382133624, + "learning_rate": 2.556295478922116e-08, + "logits": -1.7200431823730469, + "logps": -93.1734619140625, + "loss": 0.1053, + "objective": 0.12091321498155594, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6833333373069763, + "regularize": 0.12091320008039474, + "step": 765 + }, + { + "dpo_loss": 0.5380468964576721, + "epoch": 4.364666981577704, + "grad_norm": 17.361908211383366, + "learning_rate": 2.3423053240837514e-08, + "logits": -1.577264428138733, + "logps": -91.18030548095703, + "loss": 0.1064, + "objective": 0.11028440296649933, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.637499988079071, + "regularize": 0.11028438061475754, + "step": 770 + }, + { + "dpo_loss": 0.5494747161865234, + "epoch": 4.393008974964572, + "grad_norm": 16.640977636984772, + "learning_rate": 2.137227491364016e-08, + "logits": -1.627792239189148, + "logps": -91.97000885009766, + "loss": 0.1067, + "objective": 0.10595239698886871, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6333333253860474, + "regularize": 0.10595235228538513, + "step": 775 + }, + { + "dpo_loss": 0.5575358867645264, + "epoch": 4.42135096835144, + "grad_norm": 17.112345268128863, + "learning_rate": 1.9411426473854687e-08, + "logits": -1.693690538406372, + "logps": -90.5418472290039, + "loss": 0.1004, + "objective": 0.10788667947053909, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6208333373069763, + "regularize": 0.10788667947053909, + "step": 780 + }, + { + "dpo_loss": 0.5676646828651428, + "epoch": 4.449692961738309, + "grad_norm": 16.657419373072543, + "learning_rate": 1.7541279214111275e-08, + "logits": -1.7215303182601929, + "logps": -90.63499450683594, + "loss": 0.1128, + "objective": 0.1177934780716896, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.612500011920929, + "regularize": 0.1177934780716896, + "step": 785 + }, + { + "dpo_loss": 0.539345920085907, + "epoch": 4.478034955125177, + "grad_norm": 17.089680176209615, + "learning_rate": 1.57625687500596e-08, + "logits": -1.6345340013504028, + "logps": -93.4063720703125, + "loss": 0.0998, + "objective": 0.10817180573940277, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6916666626930237, + "regularize": 0.10817176848649979, + "step": 790 + }, + { + "dpo_loss": 0.547528088092804, + "epoch": 4.506376948512045, + "grad_norm": 17.320194779122446, + "learning_rate": 1.4075994731016894e-08, + "logits": -1.5627334117889404, + "logps": -93.30286407470703, + "loss": 0.1058, + "objective": 0.102629154920578, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6291666626930237, + "regularize": 0.10262913256883621, + "step": 795 + }, + { + "dpo_loss": 0.5486911535263062, + "epoch": 4.534718941898913, + "grad_norm": 17.9229448629881, + "learning_rate": 1.2482220564763667e-08, + "logits": -1.5870776176452637, + "logps": -92.80538940429688, + "loss": 0.1042, + "objective": 0.11460768431425095, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6499999761581421, + "regularize": 0.11460768431425095, + "step": 800 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 0.6852768659591675, + "eval_logits": -1.7317209243774414, + "eval_logps": -99.85995483398438, + "eval_loss": 0.4175398349761963, + "eval_objective": 0.4183157980442047, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.4183157980442047, + "eval_runtime": 259.0856, + "eval_samples_per_second": 22.348, + "eval_steps_per_second": 0.934, + "step": 800 + }, + { + "dpo_loss": 0.5488670468330383, + "epoch": 4.563060935285781, + "grad_norm": 18.799592478883184, + "learning_rate": 1.0981873156594379e-08, + "logits": -1.627816081047058, + "logps": -91.32179260253906, + "loss": 0.1001, + "objective": 0.10988225042819977, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.6041666865348816, + "regularize": 0.10988224297761917, + "step": 805 + }, + { + "dpo_loss": 0.5488799810409546, + "epoch": 4.59140292867265, + "grad_norm": 16.566296936758206, + "learning_rate": 9.575542662726754e-09, + "logits": -1.7243562936782837, + "logps": -91.10765075683594, + "loss": 0.0996, + "objective": 0.08869278430938721, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5833333134651184, + "regularize": 0.08869277685880661, + "step": 810 + }, + { + "dpo_loss": 0.5502530336380005, + "epoch": 4.619744922059518, + "grad_norm": 16.948787644578637, + "learning_rate": 8.263782258165819e-09, + "logits": -1.5700196027755737, + "logps": -92.37843322753906, + "loss": 0.0991, + "objective": 0.07977009564638138, + "ranking_idealized": 0.7250000238418579, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.6958333253860474, + "regularize": 0.07977008074522018, + "step": 815 + }, + { + "dpo_loss": 0.5550402402877808, + "epoch": 4.648086915446386, + "grad_norm": 21.501267763535818, + "learning_rate": 7.047107919114586e-09, + "logits": -1.6636712551116943, + "logps": -92.61454010009766, + "loss": 0.0982, + "objective": 0.08367303013801575, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6708333492279053, + "regularize": 0.08367302268743515, + "step": 820 + }, + { + "dpo_loss": 0.5588962435722351, + "epoch": 4.6764289088332545, + "grad_norm": 16.315710057694485, + "learning_rate": 5.925998220016659e-09, + "logits": -1.5499807596206665, + "logps": -90.22130584716797, + "loss": 0.1018, + "objective": 0.09301813691854477, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5833333134651184, + "regularize": 0.09301812201738358, + "step": 825 + }, + { + "dpo_loss": 0.5498708486557007, + "epoch": 4.7047709022201225, + "grad_norm": 17.748669049129045, + "learning_rate": 4.9008941453107525e-09, + "logits": -1.7388263940811157, + "logps": -92.17695617675781, + "loss": 0.1092, + "objective": 0.11468993872404099, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6416666507720947, + "regularize": 0.1146899089217186, + "step": 830 + }, + { + "dpo_loss": 0.5327169299125671, + "epoch": 4.733112895606991, + "grad_norm": 17.389468155390862, + "learning_rate": 3.9721989159709754e-09, + "logits": -1.6580873727798462, + "logps": -92.22929382324219, + "loss": 0.1, + "objective": 0.09779965132474899, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6041666865348816, + "regularize": 0.0977996364235878, + "step": 835 + }, + { + "dpo_loss": 0.5324665904045105, + "epoch": 4.7614548889938595, + "grad_norm": 18.51697686947663, + "learning_rate": 3.140277830901428e-09, + "logits": -1.6570351123809814, + "logps": -92.75865173339844, + "loss": 0.1079, + "objective": 0.11856434494256973, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6499999761581421, + "regularize": 0.11856433004140854, + "step": 840 + }, + { + "dpo_loss": 0.560818612575531, + "epoch": 4.7897968823807275, + "grad_norm": 17.858581136510683, + "learning_rate": 2.4054581232470785e-09, + "logits": -1.6901015043258667, + "logps": -92.13179016113281, + "loss": 0.1067, + "objective": 0.10368030518293381, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.6208333373069763, + "regularize": 0.10368029028177261, + "step": 845 + }, + { + "dpo_loss": 0.5496495366096497, + "epoch": 4.818138875767596, + "grad_norm": 17.640915580271592, + "learning_rate": 1.7680288316779256e-09, + "logits": -1.6190950870513916, + "logps": -90.9464340209961, + "loss": 0.103, + "objective": 0.10453298687934875, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6333333253860474, + "regularize": 0.10453297942876816, + "step": 850 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 0.6852567791938782, + "eval_logits": -1.732380986213684, + "eval_logps": -99.89720153808594, + "eval_loss": 0.4175875782966614, + "eval_objective": 0.4183763563632965, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.4183763563632965, + "eval_runtime": 259.1903, + "eval_samples_per_second": 22.339, + "eval_steps_per_second": 0.934, + "step": 850 + }, + { + "dpo_loss": 0.5584205389022827, + "epoch": 4.846480869154464, + "grad_norm": 17.56984774908714, + "learning_rate": 1.2282406866966078e-09, + "logits": -1.6185228824615479, + "logps": -91.83565521240234, + "loss": 0.0974, + "objective": 0.09289266169071198, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.675000011920929, + "regularize": 0.09289265424013138, + "step": 855 + }, + { + "dpo_loss": 0.5458131432533264, + "epoch": 4.874822862541333, + "grad_norm": 17.337457908328606, + "learning_rate": 7.863060120144316e-10, + "logits": -1.5824497938156128, + "logps": -91.32083892822266, + "loss": 0.0959, + "objective": 0.11178465932607651, + "ranking_idealized": 0.7166666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6791666746139526, + "regularize": 0.11178465187549591, + "step": 860 + }, + { + "dpo_loss": 0.555813729763031, + "epoch": 4.903164855928201, + "grad_norm": 16.568497702615847, + "learning_rate": 4.4239864103465254e-10, + "logits": -1.6553268432617188, + "logps": -90.40623474121094, + "loss": 0.1003, + "objective": 0.1271737664937973, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6041666865348816, + "regularize": 0.1271737515926361, + "step": 865 + }, + { + "dpo_loss": 0.5469813942909241, + "epoch": 4.931506849315069, + "grad_norm": 17.67813635168232, + "learning_rate": 1.966538484758362e-10, + "logits": -1.7142003774642944, + "logps": -92.42487335205078, + "loss": 0.1053, + "objective": 0.10629518330097198, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6625000238418579, + "regularize": 0.1062951609492302, + "step": 870 + }, + { + "dpo_loss": 0.5456808805465698, + "epoch": 4.959848842701937, + "grad_norm": 17.372715214830695, + "learning_rate": 4.9168297161839014e-11, + "logits": -1.6318602561950684, + "logps": -92.3662109375, + "loss": 0.1017, + "objective": 0.08666170388460159, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6541666388511658, + "regularize": 0.086661696434021, + "step": 875 + }, + { + "dpo_loss": 0.5451498627662659, + "epoch": 4.988190836088805, + "grad_norm": 17.608890670600516, + "learning_rate": 0.0, + "logits": -1.7033004760742188, + "logps": -92.65689849853516, + "loss": 0.0938, + "objective": 0.09012699872255325, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6541666388511658, + "regularize": 0.09012699127197266, + "step": 880 + }, + { + "epoch": 4.988190836088805, + "step": 880, + "total_flos": 0.0, + "train_loss": 0.19899855256080629, + "train_runtime": 35117.4941, + "train_samples_per_second": 7.233, + "train_steps_per_second": 0.025 + } + ], + "logging_steps": 5, + "max_steps": 880, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}