diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,2969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 880, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.005668398677373642, + "grad_norm": 13.413590921123136, + "learning_rate": 1.1363636363636363e-08, + "logits": -1.3147305250167847, + "logps": -88.0877456665039, + "loss": 0.4113, + "objective": 0.41588976979255676, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.41588976979255676, + "step": 1 + }, + { + "dpo_loss": 0.6931466460227966, + "epoch": 0.02834199338686821, + "grad_norm": 13.328732837688287, + "learning_rate": 5.6818181818181815e-08, + "logits": -1.3678570985794067, + "logps": -84.42396545410156, + "loss": 0.413, + "objective": 0.37554118037223816, + "ranking_idealized": 0.6510416865348816, + "ranking_idealized_expo": 0.5572916865348816, + "ranking_simple": 0.546875, + "regularize": 0.37554118037223816, + "step": 5 + }, + { + "dpo_loss": 0.6923526525497437, + "epoch": 0.05668398677373642, + "grad_norm": 12.646793869664034, + "learning_rate": 1.1363636363636363e-07, + "logits": -1.4465404748916626, + "logps": -83.2779541015625, + "loss": 0.4172, + "objective": 0.4415889084339142, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.512499988079071, + "regularize": 0.4415889084339142, + "step": 10 + }, + { + "dpo_loss": 0.6924737095832825, + "epoch": 0.08502598016060463, + "grad_norm": 12.521876797779205, + "learning_rate": 1.7045454545454543e-07, + "logits": -1.4127671718597412, + "logps": -83.22444152832031, + "loss": 0.4245, + "objective": 0.41116636991500854, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5083333253860474, + "regularize": 0.41116636991500854, + "step": 15 + }, + { + "dpo_loss": 0.6923013925552368, + "epoch": 0.11336797354747284, + "grad_norm": 13.477735980439727, + "learning_rate": 2.2727272727272726e-07, + "logits": -1.4050496816635132, + "logps": -83.70260620117188, + "loss": 0.4131, + "objective": 0.40299364924430847, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5375000238418579, + "regularize": 0.40299364924430847, + "step": 20 + }, + { + "dpo_loss": 0.6886129975318909, + "epoch": 0.14170996693434104, + "grad_norm": 13.118957044182272, + "learning_rate": 2.840909090909091e-07, + "logits": -1.4580955505371094, + "logps": -83.24189758300781, + "loss": 0.4088, + "objective": 0.4266737103462219, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.5, + "regularize": 0.4266737103462219, + "step": 25 + }, + { + "dpo_loss": 0.6878851652145386, + "epoch": 0.17005196032120926, + "grad_norm": 13.707804900141507, + "learning_rate": 3.4090909090909085e-07, + "logits": -1.4379478693008423, + "logps": -83.61381530761719, + "loss": 0.4079, + "objective": 0.41071560978889465, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5333333611488342, + "regularize": 0.41071560978889465, + "step": 30 + }, + { + "dpo_loss": 0.683499276638031, + "epoch": 0.19839395370807747, + "grad_norm": 13.417059102073019, + "learning_rate": 3.977272727272727e-07, + "logits": -1.4155864715576172, + "logps": -82.38362121582031, + "loss": 0.4024, + "objective": 0.38590285181999207, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.5166666507720947, + "regularize": 0.38590285181999207, + "step": 35 + }, + { + "dpo_loss": 0.6810929775238037, + "epoch": 0.22673594709494568, + "grad_norm": 14.466457742258758, + "learning_rate": 4.545454545454545e-07, + "logits": -1.4129302501678467, + "logps": -83.20220184326172, + "loss": 0.4137, + "objective": 0.4313904047012329, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.512499988079071, + "regularize": 0.4313904047012329, + "step": 40 + }, + { + "dpo_loss": 0.676669180393219, + "epoch": 0.25507794048181387, + "grad_norm": 13.031938578727805, + "learning_rate": 5.113636363636363e-07, + "logits": -1.5500620603561401, + "logps": -84.26518249511719, + "loss": 0.4102, + "objective": 0.4602474272251129, + "ranking_idealized": 0.7208333611488342, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5416666865348816, + "regularize": 0.4602474272251129, + "step": 45 + }, + { + "dpo_loss": 0.6746354699134827, + "epoch": 0.2834199338686821, + "grad_norm": 12.780839075585325, + "learning_rate": 5.681818181818182e-07, + "logits": -1.4677213430404663, + "logps": -81.97610473632812, + "loss": 0.4009, + "objective": 0.38121363520622253, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.42500001192092896, + "ranking_simple": 0.46666666865348816, + "regularize": 0.38121363520622253, + "step": 50 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.6905611157417297, + "eval_logits": -1.5066354274749756, + "eval_logps": -90.34810638427734, + "eval_loss": 0.4076729416847229, + "eval_objective": 0.4091368019580841, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5144628286361694, + "eval_regularize": 0.4091368019580841, + "eval_runtime": 260.5812, + "eval_samples_per_second": 22.22, + "eval_steps_per_second": 0.929, + "step": 50 + }, + { + "dpo_loss": 0.6733829975128174, + "epoch": 0.3117619272555503, + "grad_norm": 14.366169874458807, + "learning_rate": 6.249999999999999e-07, + "logits": -1.5239266157150269, + "logps": -83.96293640136719, + "loss": 0.3923, + "objective": 0.3916258215904236, + "ranking_idealized": 0.7124999761581421, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5458333492279053, + "regularize": 0.3916258215904236, + "step": 55 + }, + { + "dpo_loss": 0.6722519993782043, + "epoch": 0.3401039206424185, + "grad_norm": 13.825071963348561, + "learning_rate": 6.818181818181817e-07, + "logits": -1.5167808532714844, + "logps": -84.09709167480469, + "loss": 0.3826, + "objective": 0.36959540843963623, + "ranking_idealized": 0.7041666507720947, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5458333492279053, + "regularize": 0.36959540843963623, + "step": 60 + }, + { + "dpo_loss": 0.6677471995353699, + "epoch": 0.3684459140292867, + "grad_norm": 14.718501577233726, + "learning_rate": 7.386363636363636e-07, + "logits": -1.4766509532928467, + "logps": -85.11907196044922, + "loss": 0.3687, + "objective": 0.3556104004383087, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5041666626930237, + "regularize": 0.3556104004383087, + "step": 65 + }, + { + "dpo_loss": 0.6649280786514282, + "epoch": 0.39678790741615494, + "grad_norm": 14.263348001605417, + "learning_rate": 7.954545454545454e-07, + "logits": -1.564154028892517, + "logps": -86.45026397705078, + "loss": 0.3702, + "objective": 0.4041551947593689, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.5249999761581421, + "regularize": 0.4041551947593689, + "step": 70 + }, + { + "dpo_loss": 0.664714515209198, + "epoch": 0.42512990080302315, + "grad_norm": 14.983540223476671, + "learning_rate": 8.522727272727273e-07, + "logits": -1.589759349822998, + "logps": -86.56982421875, + "loss": 0.375, + "objective": 0.394586443901062, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5958333611488342, + "regularize": 0.394586443901062, + "step": 75 + }, + { + "dpo_loss": 0.6637415289878845, + "epoch": 0.45347189418989137, + "grad_norm": 15.092974710602856, + "learning_rate": 9.09090909090909e-07, + "logits": -1.6327601671218872, + "logps": -86.078857421875, + "loss": 0.3592, + "objective": 0.36432406306266785, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.550000011920929, + "regularize": 0.36432406306266785, + "step": 80 + }, + { + "dpo_loss": 0.6607963442802429, + "epoch": 0.4818138875767596, + "grad_norm": 13.665881861145971, + "learning_rate": 9.65909090909091e-07, + "logits": -1.6192591190338135, + "logps": -84.15817260742188, + "loss": 0.3518, + "objective": 0.3733659088611603, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5375000238418579, + "regularize": 0.3733659088611603, + "step": 85 + }, + { + "dpo_loss": 0.6554233431816101, + "epoch": 0.5101558809636277, + "grad_norm": 14.760661346851347, + "learning_rate": 9.999842657116664e-07, + "logits": -1.4328864812850952, + "logps": -87.27013397216797, + "loss": 0.3509, + "objective": 0.3566504418849945, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.5, + "regularize": 0.3566504418849945, + "step": 90 + }, + { + "dpo_loss": 0.6343147158622742, + "epoch": 0.538497874350496, + "grad_norm": 14.360399578992759, + "learning_rate": 9.998072663403656e-07, + "logits": -1.518968105316162, + "logps": -87.2616958618164, + "loss": 0.3323, + "objective": 0.3436143696308136, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5958333611488342, + "regularize": 0.3436143696308136, + "step": 95 + }, + { + "dpo_loss": 0.6289714574813843, + "epoch": 0.5668398677373642, + "grad_norm": 14.235586512699614, + "learning_rate": 9.99433669591504e-07, + "logits": -1.6142817735671997, + "logps": -86.73899841308594, + "loss": 0.3456, + "objective": 0.3731040954589844, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5541666746139526, + "regularize": 0.3731040954589844, + "step": 100 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.686673104763031, + "eval_logits": -1.6104196310043335, + "eval_logps": -92.6246109008789, + "eval_loss": 0.4036862850189209, + "eval_objective": 0.40806055068969727, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.40806055068969727, + "eval_runtime": 260.2239, + "eval_samples_per_second": 22.25, + "eval_steps_per_second": 0.93, + "step": 100 + }, + { + "dpo_loss": 0.6265436410903931, + "epoch": 0.5951818611242324, + "grad_norm": 17.534314390535563, + "learning_rate": 9.988636224180095e-07, + "logits": -1.4830114841461182, + "logps": -87.12000274658203, + "loss": 0.336, + "objective": 0.33996376395225525, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.5791666507720947, + "regularize": 0.33996376395225525, + "step": 105 + }, + { + "dpo_loss": 0.6268933415412903, + "epoch": 0.6235238545111006, + "grad_norm": 17.309478536368452, + "learning_rate": 9.980973490458728e-07, + "logits": -1.6955339908599854, + "logps": -86.52151489257812, + "loss": 0.3327, + "objective": 0.3487900495529175, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.5291666388511658, + "regularize": 0.3487900495529175, + "step": 110 + }, + { + "dpo_loss": 0.6324852108955383, + "epoch": 0.6518658478979689, + "grad_norm": 16.226500081559077, + "learning_rate": 9.971351508859486e-07, + "logits": -1.6517375707626343, + "logps": -85.2242660522461, + "loss": 0.3154, + "objective": 0.312590092420578, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.512499988079071, + "regularize": 0.312590092420578, + "step": 115 + }, + { + "dpo_loss": 0.6312325596809387, + "epoch": 0.680207841284837, + "grad_norm": 15.958846654193053, + "learning_rate": 9.959774064153975e-07, + "logits": -1.5981483459472656, + "logps": -87.78892517089844, + "loss": 0.3121, + "objective": 0.2821680009365082, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5666666626930237, + "regularize": 0.2821680009365082, + "step": 120 + }, + { + "dpo_loss": 0.6170183420181274, + "epoch": 0.7085498346717053, + "grad_norm": 14.799813931917887, + "learning_rate": 9.94624571028813e-07, + "logits": -1.5694143772125244, + "logps": -88.2162094116211, + "loss": 0.3079, + "objective": 0.31248998641967773, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6083333492279053, + "regularize": 0.31248998641967773, + "step": 125 + }, + { + "dpo_loss": 0.630465567111969, + "epoch": 0.7368918280585735, + "grad_norm": 14.141148872850344, + "learning_rate": 9.930771768590933e-07, + "logits": -1.7086971998214722, + "logps": -85.12911987304688, + "loss": 0.3053, + "objective": 0.30258244276046753, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.4541666805744171, + "ranking_simple": 0.5166666507720947, + "regularize": 0.30258244276046753, + "step": 130 + }, + { + "dpo_loss": 0.6269639134407043, + "epoch": 0.7652338214454416, + "grad_norm": 14.651813715107076, + "learning_rate": 9.91335832568129e-07, + "logits": -1.661932110786438, + "logps": -86.05510711669922, + "loss": 0.2876, + "objective": 0.298209547996521, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.5666666626930237, + "regularize": 0.2982095181941986, + "step": 135 + }, + { + "dpo_loss": 0.6221628785133362, + "epoch": 0.7935758148323099, + "grad_norm": 15.239526231712386, + "learning_rate": 9.894012231073895e-07, + "logits": -1.550681710243225, + "logps": -86.62859344482422, + "loss": 0.2845, + "objective": 0.30004462599754333, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5458333492279053, + "regularize": 0.30004462599754333, + "step": 140 + }, + { + "dpo_loss": 0.608003556728363, + "epoch": 0.821917808219178, + "grad_norm": 14.863021825503232, + "learning_rate": 9.872741094484964e-07, + "logits": -1.6267261505126953, + "logps": -87.43079376220703, + "loss": 0.2862, + "objective": 0.29867058992385864, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5375000238418579, + "regularize": 0.29867058992385864, + "step": 145 + }, + { + "dpo_loss": 0.6166930794715881, + "epoch": 0.8502598016060463, + "grad_norm": 14.76588996791471, + "learning_rate": 9.849553282839024e-07, + "logits": -1.587580919265747, + "logps": -84.92633819580078, + "loss": 0.2786, + "objective": 0.27978453040122986, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.5916666388511658, + "regularize": 0.27978453040122986, + "step": 150 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.6873137950897217, + "eval_logits": -1.6473405361175537, + "eval_logps": -94.62364196777344, + "eval_loss": 0.40614137053489685, + "eval_objective": 0.41305893659591675, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.41305893659591675, + "eval_runtime": 260.0727, + "eval_samples_per_second": 22.263, + "eval_steps_per_second": 0.931, + "step": 150 + }, + { + "dpo_loss": 0.605694591999054, + "epoch": 0.8786017949929145, + "grad_norm": 14.394381519568077, + "learning_rate": 9.824457916977784e-07, + "logits": -1.621727466583252, + "logps": -86.20561218261719, + "loss": 0.2759, + "objective": 0.2766547203063965, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6041666865348816, + "regularize": 0.2766547203063965, + "step": 155 + }, + { + "dpo_loss": 0.5983697175979614, + "epoch": 0.9069437883797827, + "grad_norm": 15.160839164653526, + "learning_rate": 9.797464868072486e-07, + "logits": -1.5286997556686401, + "logps": -89.5051498413086, + "loss": 0.2598, + "objective": 0.26661187410354614, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.574999988079071, + "regularize": 0.26661187410354614, + "step": 160 + }, + { + "dpo_loss": 0.6094165444374084, + "epoch": 0.9352857817666509, + "grad_norm": 13.97029915603188, + "learning_rate": 9.768584753741134e-07, + "logits": -1.5288485288619995, + "logps": -89.78202819824219, + "loss": 0.2722, + "objective": 0.2768023908138275, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.5833333134651184, + "regularize": 0.2768023908138275, + "step": 165 + }, + { + "dpo_loss": 0.5933364033699036, + "epoch": 0.9636277751535192, + "grad_norm": 16.22461169876271, + "learning_rate": 9.737828933872073e-07, + "logits": -1.5362460613250732, + "logps": -89.80062866210938, + "loss": 0.2637, + "objective": 0.25175875425338745, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6000000238418579, + "regularize": 0.25175875425338745, + "step": 170 + }, + { + "dpo_loss": 0.5974230170249939, + "epoch": 0.9919697685403873, + "grad_norm": 15.239246649578357, + "learning_rate": 9.705209506155634e-07, + "logits": -1.4934788942337036, + "logps": -88.75602722167969, + "loss": 0.2634, + "objective": 0.24328327178955078, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6583333611488342, + "regularize": 0.24328327178955078, + "step": 175 + }, + { + "dpo_loss": 0.581199049949646, + "epoch": 1.0203117619272555, + "grad_norm": 14.774253000474259, + "learning_rate": 9.670739301325534e-07, + "logits": -1.6717146635055542, + "logps": -88.1180191040039, + "loss": 0.2291, + "objective": 0.2292441576719284, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6208333373069763, + "regularize": 0.2292441576719284, + "step": 180 + }, + { + "dpo_loss": 0.575748860836029, + "epoch": 1.0486537553141237, + "grad_norm": 17.006560148175247, + "learning_rate": 9.63443187811197e-07, + "logits": -1.5418639183044434, + "logps": -88.76116180419922, + "loss": 0.2333, + "objective": 0.2448757141828537, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.6083333492279053, + "regularize": 0.2448757141828537, + "step": 185 + }, + { + "dpo_loss": 0.5784227848052979, + "epoch": 1.076995748700992, + "grad_norm": 15.70285096364104, + "learning_rate": 9.596301517908328e-07, + "logits": -1.5934358835220337, + "logps": -89.71295928955078, + "loss": 0.2135, + "objective": 0.19769170880317688, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.625, + "regularize": 0.19769169390201569, + "step": 190 + }, + { + "dpo_loss": 0.5825232267379761, + "epoch": 1.10533774208786, + "grad_norm": 14.396192466103011, + "learning_rate": 9.556363219153662e-07, + "logits": -1.5657871961593628, + "logps": -87.62194061279297, + "loss": 0.2125, + "objective": 0.1948522925376892, + "ranking_idealized": 0.7041666507720947, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.6333333253860474, + "regularize": 0.1948522925376892, + "step": 195 + }, + { + "dpo_loss": 0.5780203342437744, + "epoch": 1.1336797354747283, + "grad_norm": 14.49314507260273, + "learning_rate": 9.514632691433106e-07, + "logits": -1.6607592105865479, + "logps": -87.46318817138672, + "loss": 0.2075, + "objective": 0.20799687504768372, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6000000238418579, + "regularize": 0.20799687504768372, + "step": 200 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.6855701804161072, + "eval_logits": -1.6490410566329956, + "eval_logps": -95.76744079589844, + "eval_loss": 0.408469021320343, + "eval_objective": 0.41199764609336853, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5175619721412659, + "eval_regularize": 0.41199764609336853, + "eval_runtime": 260.0694, + "eval_samples_per_second": 22.263, + "eval_steps_per_second": 0.931, + "step": 200 + }, + { + "dpo_loss": 0.5706655979156494, + "epoch": 1.1620217288615966, + "grad_norm": 15.635885064412406, + "learning_rate": 9.471126349298556e-07, + "logits": -1.6521145105361938, + "logps": -88.61875915527344, + "loss": 0.2091, + "objective": 0.2020285278558731, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.5916666388511658, + "regularize": 0.2020285278558731, + "step": 205 + }, + { + "dpo_loss": 0.5730646252632141, + "epoch": 1.1903637222484649, + "grad_norm": 14.844650272500662, + "learning_rate": 9.425861305812081e-07, + "logits": -1.6272530555725098, + "logps": -87.98621368408203, + "loss": 0.208, + "objective": 0.2310691922903061, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.5958333611488342, + "regularize": 0.2310691922903061, + "step": 210 + }, + { + "dpo_loss": 0.5831784009933472, + "epoch": 1.2187057156353331, + "grad_norm": 14.235853311462472, + "learning_rate": 9.378855365814557e-07, + "logits": -1.5842416286468506, + "logps": -88.42698669433594, + "loss": 0.1917, + "objective": 0.20643554627895355, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6000000238418579, + "regularize": 0.20643554627895355, + "step": 215 + }, + { + "dpo_loss": 0.574702799320221, + "epoch": 1.2470477090222012, + "grad_norm": 15.09350903860739, + "learning_rate": 9.330127018922193e-07, + "logits": -1.5366528034210205, + "logps": -89.38643646240234, + "loss": 0.1998, + "objective": 0.18881799280643463, + "ranking_idealized": 0.7208333611488342, + "ranking_idealized_expo": 0.6041666865348816, + "ranking_simple": 0.6791666746139526, + "regularize": 0.18881799280643463, + "step": 220 + }, + { + "dpo_loss": 0.5676099061965942, + "epoch": 1.2753897024090695, + "grad_norm": 14.088714346139406, + "learning_rate": 9.279695432253708e-07, + "logits": -1.6117132902145386, + "logps": -90.31532287597656, + "loss": 0.2023, + "objective": 0.2032053917646408, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6458333134651184, + "regularize": 0.2032053917646408, + "step": 225 + }, + { + "dpo_loss": 0.5804415941238403, + "epoch": 1.3037316957959377, + "grad_norm": 14.005106959698436, + "learning_rate": 9.227580442891021e-07, + "logits": -1.5858609676361084, + "logps": -89.74004364013672, + "loss": 0.1904, + "objective": 0.188642218708992, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5916666388511658, + "regularize": 0.1886422038078308, + "step": 230 + }, + { + "dpo_loss": 0.5623538494110107, + "epoch": 1.3320736891828058, + "grad_norm": 15.259842003072114, + "learning_rate": 9.173802550076401e-07, + "logits": -1.6947582960128784, + "logps": -88.09205627441406, + "loss": 0.1949, + "objective": 0.19328099489212036, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6333333253860474, + "regularize": 0.19328099489212036, + "step": 235 + }, + { + "dpo_loss": 0.5668548345565796, + "epoch": 1.360415682569674, + "grad_norm": 14.478002369411158, + "learning_rate": 9.118382907149163e-07, + "logits": -1.6436070203781128, + "logps": -89.80839538574219, + "loss": 0.1801, + "objective": 0.1796088069677353, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.574999988079071, + "regularize": 0.1796088069677353, + "step": 240 + }, + { + "dpo_loss": 0.5793067812919617, + "epoch": 1.3887576759565423, + "grad_norm": 15.141311134036634, + "learning_rate": 9.061343313225087e-07, + "logits": -1.6800334453582764, + "logps": -89.72946166992188, + "loss": 0.19, + "objective": 0.1958913505077362, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6083333492279053, + "regularize": 0.1958913505077362, + "step": 245 + }, + { + "dpo_loss": 0.5501002669334412, + "epoch": 1.4170996693434104, + "grad_norm": 14.176924356730293, + "learning_rate": 9.002706204621802e-07, + "logits": -1.6303541660308838, + "logps": -88.4422836303711, + "loss": 0.1852, + "objective": 0.16911663115024567, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.6333333253860474, + "regularize": 0.16911663115024567, + "step": 250 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6845212578773499, + "eval_logits": -1.697658896446228, + "eval_logps": -95.10143280029297, + "eval_loss": 0.40446704626083374, + "eval_objective": 0.40795382857322693, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5227272510528564, + "eval_regularize": 0.40795382857322693, + "eval_runtime": 259.673, + "eval_samples_per_second": 22.297, + "eval_steps_per_second": 0.932, + "step": 250 + }, + { + "dpo_loss": 0.5670092701911926, + "epoch": 1.4454416627302786, + "grad_norm": 14.305797765290281, + "learning_rate": 8.942494646033554e-07, + "logits": -1.6137691736221313, + "logps": -88.44405364990234, + "loss": 0.1907, + "objective": 0.18229366838932037, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6416666507720947, + "regularize": 0.18229366838932037, + "step": 255 + }, + { + "dpo_loss": 0.5675135254859924, + "epoch": 1.473783656117147, + "grad_norm": 15.380169031973479, + "learning_rate": 8.880732321458784e-07, + "logits": -1.6693089008331299, + "logps": -89.93167877197266, + "loss": 0.1819, + "objective": 0.19000987708568573, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.574999988079071, + "regularize": 0.19000987708568573, + "step": 260 + }, + { + "dpo_loss": 0.5571501851081848, + "epoch": 1.5021256495040152, + "grad_norm": 15.264528602169092, + "learning_rate": 8.817443524884117e-07, + "logits": -1.6189838647842407, + "logps": -87.48661804199219, + "loss": 0.1865, + "objective": 0.19823399186134338, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6208333373069763, + "regularize": 0.19823399186134338, + "step": 265 + }, + { + "dpo_loss": 0.5573412775993347, + "epoch": 1.5304676428908834, + "grad_norm": 14.915226583029336, + "learning_rate": 8.752653150728411e-07, + "logits": -1.6289520263671875, + "logps": -89.79716491699219, + "loss": 0.1811, + "objective": 0.18896053731441498, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.612500011920929, + "regularize": 0.18896053731441498, + "step": 270 + }, + { + "dpo_loss": 0.5617662668228149, + "epoch": 1.5588096362777515, + "grad_norm": 14.461835236701942, + "learning_rate": 8.68638668405062e-07, + "logits": -1.6396280527114868, + "logps": -90.90117645263672, + "loss": 0.1786, + "objective": 0.16418127715587616, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6499999761581421, + "regularize": 0.16418126225471497, + "step": 275 + }, + { + "dpo_loss": 0.5430324077606201, + "epoch": 1.5871516296646198, + "grad_norm": 14.442514240271057, + "learning_rate": 8.61867019052535e-07, + "logits": -1.5382746458053589, + "logps": -88.97178649902344, + "loss": 0.1774, + "objective": 0.17928847670555115, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6416666507720947, + "regularize": 0.17928847670555115, + "step": 280 + }, + { + "dpo_loss": 0.5505569577217102, + "epoch": 1.615493623051488, + "grad_norm": 16.245610088733493, + "learning_rate": 8.549530306190014e-07, + "logits": -1.6338722705841064, + "logps": -89.0966796875, + "loss": 0.1683, + "objective": 0.16002054512500763, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6333333253860474, + "regularize": 0.16002054512500763, + "step": 285 + }, + { + "dpo_loss": 0.5579850673675537, + "epoch": 1.643835616438356, + "grad_norm": 15.414341809456326, + "learning_rate": 8.478994226967638e-07, + "logits": -1.6751606464385986, + "logps": -88.0530776977539, + "loss": 0.1719, + "objective": 0.1747826188802719, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6541666388511658, + "regularize": 0.17478260397911072, + "step": 290 + }, + { + "dpo_loss": 0.5509874224662781, + "epoch": 1.6721776098252243, + "grad_norm": 13.923449942826227, + "learning_rate": 8.407089697969456e-07, + "logits": -1.5533292293548584, + "logps": -87.6349868774414, + "loss": 0.1765, + "objective": 0.17216673493385315, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.6166666746139526, + "regularize": 0.17216673493385315, + "step": 295 + }, + { + "dpo_loss": 0.5574812293052673, + "epoch": 1.7005196032120926, + "grad_norm": 15.451129216726908, + "learning_rate": 8.333845002581458e-07, + "logits": -1.6305700540542603, + "logps": -88.86536407470703, + "loss": 0.172, + "objective": 0.19959108531475067, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.6625000238418579, + "regularize": 0.19959108531475067, + "step": 300 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6843295097351074, + "eval_logits": -1.640322208404541, + "eval_logps": -95.94416809082031, + "eval_loss": 0.4054754078388214, + "eval_objective": 0.4098014831542969, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5227272510528564, + "eval_regularize": 0.4098014831542969, + "eval_runtime": 260.124, + "eval_samples_per_second": 22.259, + "eval_steps_per_second": 0.93, + "step": 300 + }, + { + "dpo_loss": 0.5613775849342346, + "epoch": 1.7288615965989607, + "grad_norm": 14.905752064782224, + "learning_rate": 8.259288951339232e-07, + "logits": -1.60408616065979, + "logps": -89.15306091308594, + "loss": 0.1652, + "objective": 0.1750606745481491, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.612500011920929, + "regularize": 0.1750606745481491, + "step": 305 + }, + { + "dpo_loss": 0.5524806380271912, + "epoch": 1.7572035899858292, + "grad_norm": 15.166116258201686, + "learning_rate": 8.183450870595441e-07, + "logits": -1.6861586570739746, + "logps": -87.07334899902344, + "loss": 0.1671, + "objective": 0.1604899913072586, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6416666507720947, + "regularize": 0.1604899913072586, + "step": 310 + }, + { + "dpo_loss": 0.5647029280662537, + "epoch": 1.7855455833726972, + "grad_norm": 14.218932923228996, + "learning_rate": 8.106360590984404e-07, + "logits": -1.6006724834442139, + "logps": -87.93310546875, + "loss": 0.165, + "objective": 0.16339111328125, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.612500011920929, + "regularize": 0.1633910983800888, + "step": 315 + }, + { + "dpo_loss": 0.5607351064682007, + "epoch": 1.8138875767595655, + "grad_norm": 14.843987076974228, + "learning_rate": 8.028048435688333e-07, + "logits": -1.6807405948638916, + "logps": -90.42388153076172, + "loss": 0.1542, + "objective": 0.15479125082492828, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6333333253860474, + "regularize": 0.15479125082492828, + "step": 320 + }, + { + "dpo_loss": 0.5438184142112732, + "epoch": 1.8422295701464337, + "grad_norm": 15.354095003208785, + "learning_rate": 7.948545208509811e-07, + "logits": -1.7393078804016113, + "logps": -92.29595184326172, + "loss": 0.1626, + "objective": 0.15665055811405182, + "ranking_idealized": 0.7166666388511658, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6791666746139526, + "regularize": 0.15665055811405182, + "step": 325 + }, + { + "dpo_loss": 0.5477765798568726, + "epoch": 1.8705715635333018, + "grad_norm": 14.393471500808245, + "learning_rate": 7.86788218175523e-07, + "logits": -1.4602495431900024, + "logps": -89.03805541992188, + "loss": 0.1588, + "objective": 0.15682725608348846, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.6166666746139526, + "regularize": 0.15682725608348846, + "step": 330 + }, + { + "dpo_loss": 0.562004029750824, + "epoch": 1.89891355692017, + "grad_norm": 14.386579659864832, + "learning_rate": 7.786091083933949e-07, + "logits": -1.6249334812164307, + "logps": -88.8255844116211, + "loss": 0.1531, + "objective": 0.14314964413642883, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.612500011920929, + "regularize": 0.14314964413642883, + "step": 335 + }, + { + "dpo_loss": 0.5628435015678406, + "epoch": 1.9272555503070383, + "grad_norm": 14.464107878774616, + "learning_rate": 7.703204087277988e-07, + "logits": -1.6546257734298706, + "logps": -90.56217956542969, + "loss": 0.1527, + "objective": 0.1476096212863922, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6666666865348816, + "regularize": 0.1476096212863922, + "step": 340 + }, + { + "dpo_loss": 0.5599310994148254, + "epoch": 1.9555975436939064, + "grad_norm": 14.799147392008404, + "learning_rate": 7.619253795087208e-07, + "logits": -1.647698998451233, + "logps": -89.57904815673828, + "loss": 0.1417, + "objective": 0.13018347322940826, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6166666746139526, + "regularize": 0.13018347322940826, + "step": 345 + }, + { + "dpo_loss": 0.5558927655220032, + "epoch": 1.9839395370807746, + "grad_norm": 14.874212978278477, + "learning_rate": 7.534273228904915e-07, + "logits": -1.5429824590682983, + "logps": -90.79412078857422, + "loss": 0.1504, + "objective": 0.14253783226013184, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.6000000238418579, + "regularize": 0.14253783226013184, + "step": 350 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6839829683303833, + "eval_logits": -1.6734960079193115, + "eval_logps": -96.38378143310547, + "eval_loss": 0.4065950810909271, + "eval_objective": 0.40939342975616455, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.51962810754776, + "eval_regularize": 0.40939342975616455, + "eval_runtime": 259.5874, + "eval_samples_per_second": 22.305, + "eval_steps_per_second": 0.932, + "step": 350 + }, + { + "dpo_loss": 0.5446032285690308, + "epoch": 2.012281530467643, + "grad_norm": 14.92352390433015, + "learning_rate": 7.448295815528956e-07, + "logits": -1.5243901014328003, + "logps": -89.14817810058594, + "loss": 0.1469, + "objective": 0.13605408370494843, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6708333492279053, + "regularize": 0.13605406880378723, + "step": 355 + }, + { + "dpo_loss": 0.5358213186264038, + "epoch": 2.040623523854511, + "grad_norm": 17.820965696851154, + "learning_rate": 7.361355373863413e-07, + "logits": -1.6093335151672363, + "logps": -88.62029266357422, + "loss": 0.1269, + "objective": 0.13526426255702972, + "ranking_idealized": 0.7250000238418579, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.699999988079071, + "regularize": 0.13526426255702972, + "step": 360 + }, + { + "dpo_loss": 0.5384168028831482, + "epoch": 2.0689655172413794, + "grad_norm": 16.00523341015884, + "learning_rate": 7.273486101616056e-07, + "logits": -1.6718920469284058, + "logps": -90.12538146972656, + "loss": 0.1321, + "objective": 0.12407148629426956, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6708333492279053, + "regularize": 0.12407147884368896, + "step": 365 + }, + { + "dpo_loss": 0.5496992468833923, + "epoch": 2.0973075106282475, + "grad_norm": 14.939079070952808, + "learning_rate": 7.184722561846797e-07, + "logits": -1.6580688953399658, + "logps": -87.11526489257812, + "loss": 0.1168, + "objective": 0.12028197199106216, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.6208333373069763, + "regularize": 0.12028196454048157, + "step": 370 + }, + { + "dpo_loss": 0.5366081595420837, + "epoch": 2.1256495040151155, + "grad_norm": 14.800979136941399, + "learning_rate": 7.095099669372443e-07, + "logits": -1.6352336406707764, + "logps": -89.77750396728516, + "loss": 0.1247, + "objective": 0.11821580678224564, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.7083333134651184, + "regularize": 0.11821580678224564, + "step": 375 + }, + { + "dpo_loss": 0.5499475002288818, + "epoch": 2.153991497401984, + "grad_norm": 13.76934472823536, + "learning_rate": 7.004652677033068e-07, + "logits": -1.5623695850372314, + "logps": -88.50102996826172, + "loss": 0.1232, + "objective": 0.10588161647319794, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6291666626930237, + "regularize": 0.10588161647319794, + "step": 380 + }, + { + "dpo_loss": 0.5542941689491272, + "epoch": 2.182333490788852, + "grad_norm": 14.633555067363714, + "learning_rate": 6.913417161825449e-07, + "logits": -1.6838804483413696, + "logps": -89.8261947631836, + "loss": 0.1154, + "objective": 0.09328292310237885, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6416666507720947, + "regularize": 0.09328292310237885, + "step": 385 + }, + { + "dpo_loss": 0.5482229590415955, + "epoch": 2.21067548417572, + "grad_norm": 15.478260370876582, + "learning_rate": 6.821429010908971e-07, + "logits": -1.559071660041809, + "logps": -89.70718383789062, + "loss": 0.121, + "objective": 0.11915615200996399, + "ranking_idealized": 0.6041666865348816, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.5958333611488342, + "regularize": 0.1191561371088028, + "step": 390 + }, + { + "dpo_loss": 0.5234766006469727, + "epoch": 2.2390174775625886, + "grad_norm": 14.815951181180777, + "learning_rate": 6.728724407489553e-07, + "logits": -1.5655180215835571, + "logps": -90.58793640136719, + "loss": 0.1231, + "objective": 0.11187195777893066, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6833333373069763, + "regularize": 0.11187195777893066, + "step": 395 + }, + { + "dpo_loss": 0.5324122309684753, + "epoch": 2.2673594709494567, + "grad_norm": 15.066758948045832, + "learning_rate": 6.635339816587108e-07, + "logits": -1.6402709484100342, + "logps": -90.86196899414062, + "loss": 0.1241, + "objective": 0.12945452332496643, + "ranking_idealized": 0.7583333253860474, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.7208333611488342, + "regularize": 0.12945450842380524, + "step": 400 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6844344735145569, + "eval_logits": -1.6892694234848022, + "eval_logps": -95.98338317871094, + "eval_loss": 0.40758705139160156, + "eval_objective": 0.411220520734787, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5237603187561035, + "eval_regularize": 0.411220520734787, + "eval_runtime": 261.5788, + "eval_samples_per_second": 22.135, + "eval_steps_per_second": 0.925, + "step": 400 + }, + { + "dpo_loss": 0.5428405404090881, + "epoch": 2.295701464336325, + "grad_norm": 15.346220951371636, + "learning_rate": 6.541311970692162e-07, + "logits": -1.7154464721679688, + "logps": -88.2754135131836, + "loss": 0.1221, + "objective": 0.13386504352092743, + "ranking_idealized": 0.737500011920929, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6666666865348816, + "regularize": 0.13386504352092743, + "step": 405 + }, + { + "dpo_loss": 0.5366548895835876, + "epoch": 2.324043457723193, + "grad_norm": 15.316836223077656, + "learning_rate": 6.446677855317264e-07, + "logits": -1.5751118659973145, + "logps": -88.22239685058594, + "loss": 0.1133, + "objective": 0.10883895307779312, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.625, + "regularize": 0.10883894562721252, + "step": 410 + }, + { + "dpo_loss": 0.5400758385658264, + "epoch": 2.3523854511100613, + "grad_norm": 15.140337757347018, + "learning_rate": 6.351474694448864e-07, + "logits": -1.6428126096725464, + "logps": -88.48053741455078, + "loss": 0.1166, + "objective": 0.1268073171377182, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.637499988079071, + "regularize": 0.1268073171377182, + "step": 415 + }, + { + "dpo_loss": 0.5541034936904907, + "epoch": 2.3807274444969297, + "grad_norm": 14.807008378400798, + "learning_rate": 6.255739935905395e-07, + "logits": -1.5306849479675293, + "logps": -90.66093444824219, + "loss": 0.1148, + "objective": 0.11297421902418137, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.612500011920929, + "regularize": 0.11297421902418137, + "step": 420 + }, + { + "dpo_loss": 0.5390843152999878, + "epoch": 2.409069437883798, + "grad_norm": 14.236861276355002, + "learning_rate": 6.159511236607315e-07, + "logits": -1.5861257314682007, + "logps": -88.8576431274414, + "loss": 0.1119, + "objective": 0.112046018242836, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.637499988079071, + "regularize": 0.1120460107922554, + "step": 425 + }, + { + "dpo_loss": 0.5440212488174438, + "epoch": 2.4374114312706663, + "grad_norm": 14.636334382365337, + "learning_rate": 6.062826447764883e-07, + "logits": -1.6188839673995972, + "logps": -90.33170318603516, + "loss": 0.1066, + "objective": 0.09743823856115341, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6333333253860474, + "regularize": 0.09743823856115341, + "step": 430 + }, + { + "dpo_loss": 0.5423538088798523, + "epoch": 2.4657534246575343, + "grad_norm": 14.885310195485024, + "learning_rate": 5.965723599989528e-07, + "logits": -1.6494262218475342, + "logps": -90.86329650878906, + "loss": 0.1193, + "objective": 0.1321849673986435, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6291666626930237, + "regularize": 0.1321849673986435, + "step": 435 + }, + { + "dpo_loss": 0.5417699813842773, + "epoch": 2.4940954180444024, + "grad_norm": 15.13405575560551, + "learning_rate": 5.868240888334652e-07, + "logits": -1.5878384113311768, + "logps": -89.1788330078125, + "loss": 0.1086, + "objective": 0.10985619574785233, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6041666865348816, + "regularize": 0.10985619574785233, + "step": 440 + }, + { + "dpo_loss": 0.5431756377220154, + "epoch": 2.5224374114312704, + "grad_norm": 14.711454767729204, + "learning_rate": 5.770416657271728e-07, + "logits": -1.616743803024292, + "logps": -86.75231170654297, + "loss": 0.1097, + "objective": 0.11407394707202911, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6833333373069763, + "regularize": 0.11407394707202911, + "step": 445 + }, + { + "dpo_loss": 0.5639354586601257, + "epoch": 2.550779404818139, + "grad_norm": 13.941250536582869, + "learning_rate": 5.67228938560766e-07, + "logits": -1.6488417387008667, + "logps": -89.29744720458984, + "loss": 0.1083, + "objective": 0.1078432947397232, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6333333253860474, + "regularize": 0.1078432947397232, + "step": 450 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.6838362216949463, + "eval_logits": -1.681365728378296, + "eval_logps": -96.42753601074219, + "eval_loss": 0.40609824657440186, + "eval_objective": 0.40940061211586, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.51962810754776, + "eval_regularize": 0.40940061211586, + "eval_runtime": 259.9201, + "eval_samples_per_second": 22.276, + "eval_steps_per_second": 0.931, + "step": 450 + }, + { + "dpo_loss": 0.5353375673294067, + "epoch": 2.579121398205007, + "grad_norm": 14.879466038094796, + "learning_rate": 5.573897671349268e-07, + "logits": -1.6768704652786255, + "logps": -89.45830535888672, + "loss": 0.1065, + "objective": 0.11618896573781967, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6666666865348816, + "regularize": 0.11618896573781967, + "step": 455 + }, + { + "dpo_loss": 0.5305708646774292, + "epoch": 2.6074633915918755, + "grad_norm": 14.078420242787786, + "learning_rate": 5.475280216520912e-07, + "logits": -1.7477765083312988, + "logps": -89.77506256103516, + "loss": 0.1089, + "objective": 0.1040702536702156, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6458333134651184, + "regularize": 0.10407023876905441, + "step": 460 + }, + { + "dpo_loss": 0.5392042994499207, + "epoch": 2.6358053849787435, + "grad_norm": 14.620942850653412, + "learning_rate": 5.376475811941191e-07, + "logits": -1.6256554126739502, + "logps": -89.18009185791016, + "loss": 0.1057, + "objective": 0.11545146256685257, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6000000238418579, + "regularize": 0.11545146256685257, + "step": 465 + }, + { + "dpo_loss": 0.5424565076828003, + "epoch": 2.6641473783656116, + "grad_norm": 15.533051216052591, + "learning_rate": 5.277523321964701e-07, + "logits": -1.6505295038223267, + "logps": -88.97268676757812, + "loss": 0.102, + "objective": 0.11157210916280746, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.637499988079071, + "regularize": 0.11157210916280746, + "step": 470 + }, + { + "dpo_loss": 0.5482778549194336, + "epoch": 2.69248937175248, + "grad_norm": 14.564992109935103, + "learning_rate": 5.178461669194903e-07, + "logits": -1.5990585088729858, + "logps": -87.48114776611328, + "loss": 0.1038, + "objective": 0.10224457085132599, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6499999761581421, + "regularize": 0.10224457085132599, + "step": 475 + }, + { + "dpo_loss": 0.5498027205467224, + "epoch": 2.720831365139348, + "grad_norm": 15.14165653036781, + "learning_rate": 5.07932981917404e-07, + "logits": -1.686042070388794, + "logps": -86.90522766113281, + "loss": 0.1017, + "objective": 0.10218793898820877, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6291666626930237, + "regularize": 0.10218793898820877, + "step": 480 + }, + { + "dpo_loss": 0.5342879891395569, + "epoch": 2.7491733585262166, + "grad_norm": 14.745717143899025, + "learning_rate": 4.980166765056193e-07, + "logits": -1.592150330543518, + "logps": -89.95211791992188, + "loss": 0.1028, + "objective": 0.09829958528280258, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6875, + "regularize": 0.09829958528280258, + "step": 485 + }, + { + "dpo_loss": 0.5277873873710632, + "epoch": 2.7775153519130846, + "grad_norm": 15.397799376978266, + "learning_rate": 4.881011512269463e-07, + "logits": -1.6200255155563354, + "logps": -87.80931091308594, + "loss": 0.1078, + "objective": 0.11417750269174576, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.4749999940395355, + "ranking_simple": 0.6541666388511658, + "regularize": 0.11417750269174576, + "step": 490 + }, + { + "dpo_loss": 0.5365422368049622, + "epoch": 2.8058573452999527, + "grad_norm": 14.75494539326366, + "learning_rate": 4.78190306317332e-07, + "logits": -1.631138563156128, + "logps": -87.2429428100586, + "loss": 0.0967, + "objective": 0.0883258655667305, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.625, + "regularize": 0.0883258506655693, + "step": 495 + }, + { + "dpo_loss": 0.5285528302192688, + "epoch": 2.8341993386868207, + "grad_norm": 14.831004301691246, + "learning_rate": 4.682880401717177e-07, + "logits": -1.6615116596221924, + "logps": -86.21328735351562, + "loss": 0.0989, + "objective": 0.0958656519651413, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6708333492279053, + "regularize": 0.0958656445145607, + "step": 500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.6844313144683838, + "eval_logits": -1.6797000169754028, + "eval_logps": -95.7645034790039, + "eval_loss": 0.4076074957847595, + "eval_objective": 0.4115086793899536, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5175619721412659, + "eval_regularize": 0.4115086793899536, + "eval_runtime": 263.4539, + "eval_samples_per_second": 21.977, + "eval_steps_per_second": 0.919, + "step": 500 + }, + { + "dpo_loss": 0.5365604758262634, + "epoch": 2.862541332073689, + "grad_norm": 15.368802903715196, + "learning_rate": 4.5839824781061886e-07, + "logits": -1.5910155773162842, + "logps": -88.20460510253906, + "loss": 0.0982, + "objective": 0.10264171659946442, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6708333492279053, + "regularize": 0.10264171659946442, + "step": 505 + }, + { + "dpo_loss": 0.5401506423950195, + "epoch": 2.8908833254605573, + "grad_norm": 14.750995001535633, + "learning_rate": 4.4852481934803277e-07, + "logits": -1.546883463859558, + "logps": -88.82188415527344, + "loss": 0.0938, + "objective": 0.09417784959077835, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6499999761581421, + "regularize": 0.09417784959077835, + "step": 510 + }, + { + "dpo_loss": 0.5448324084281921, + "epoch": 2.9192253188474258, + "grad_norm": 14.256889048024082, + "learning_rate": 4.3867163846127674e-07, + "logits": -1.7348732948303223, + "logps": -88.37023162841797, + "loss": 0.0959, + "objective": 0.09633953124284744, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.6541666388511658, + "regularize": 0.09633953124284744, + "step": 515 + }, + { + "dpo_loss": 0.5417373776435852, + "epoch": 2.947567312234294, + "grad_norm": 15.293723935135468, + "learning_rate": 4.2884258086335745e-07, + "logits": -1.6043357849121094, + "logps": -90.03458404541016, + "loss": 0.0903, + "objective": 0.08684458583593369, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6416666507720947, + "regularize": 0.08684458583593369, + "step": 520 + }, + { + "dpo_loss": 0.5415874719619751, + "epoch": 2.975909305621162, + "grad_norm": 14.348085330611658, + "learning_rate": 4.1904151277847305e-07, + "logits": -1.616308569908142, + "logps": -89.7189712524414, + "loss": 0.0923, + "objective": 0.10915852338075638, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.675000011920929, + "regularize": 0.10915852338075638, + "step": 525 + }, + { + "dpo_loss": 0.5288244485855103, + "epoch": 3.0042512990080303, + "grad_norm": 14.702772615303738, + "learning_rate": 4.092722894212487e-07, + "logits": -1.6195777654647827, + "logps": -89.2164535522461, + "loss": 0.0961, + "objective": 0.10630013048648834, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6541666388511658, + "regularize": 0.10630011558532715, + "step": 530 + }, + { + "dpo_loss": 0.5415112376213074, + "epoch": 3.0325932923948984, + "grad_norm": 14.653134844595096, + "learning_rate": 3.995387534803005e-07, + "logits": -1.6762583255767822, + "logps": -90.93753051757812, + "loss": 0.0853, + "objective": 0.09499659389257431, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6625000238418579, + "regularize": 0.09499659389257431, + "step": 535 + }, + { + "dpo_loss": 0.5392681956291199, + "epoch": 3.0609352857817664, + "grad_norm": 15.143937078082786, + "learning_rate": 3.8984473360672967e-07, + "logits": -1.7662190198898315, + "logps": -88.38541412353516, + "loss": 0.0781, + "objective": 0.06802941113710403, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6458333134651184, + "regularize": 0.06802940368652344, + "step": 540 + }, + { + "dpo_loss": 0.5409398674964905, + "epoch": 3.089277279168635, + "grad_norm": 14.949184489866422, + "learning_rate": 3.801940429081345e-07, + "logits": -1.6874436140060425, + "logps": -89.9101791381836, + "loss": 0.0793, + "objective": 0.08502420783042908, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.612500011920929, + "regularize": 0.08502420783042908, + "step": 545 + }, + { + "dpo_loss": 0.530200719833374, + "epoch": 3.117619272555503, + "grad_norm": 15.128964804198388, + "learning_rate": 3.7059047744873955e-07, + "logits": -1.5058013200759888, + "logps": -89.7841567993164, + "loss": 0.0857, + "objective": 0.08006270974874496, + "ranking_idealized": 0.699999988079071, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.699999988079071, + "regularize": 0.08006270974874496, + "step": 550 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 0.6840906143188477, + "eval_logits": -1.6864055395126343, + "eval_logps": -96.7056655883789, + "eval_loss": 0.40704044699668884, + "eval_objective": 0.41078415513038635, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.51962810754776, + "eval_regularize": 0.41078415513038635, + "eval_runtime": 259.6327, + "eval_samples_per_second": 22.301, + "eval_steps_per_second": 0.932, + "step": 550 + }, + { + "dpo_loss": 0.5227282047271729, + "epoch": 3.1459612659423715, + "grad_norm": 15.414705329848626, + "learning_rate": 3.6103781475622786e-07, + "logits": -1.6148103475570679, + "logps": -90.73031616210938, + "loss": 0.0822, + "objective": 0.08249496668577194, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.675000011920929, + "regularize": 0.08249495923519135, + "step": 555 + }, + { + "dpo_loss": 0.531684935092926, + "epoch": 3.1743032593292395, + "grad_norm": 15.644698799260503, + "learning_rate": 3.5153981233586274e-07, + "logits": -1.5928341150283813, + "logps": -88.10628509521484, + "loss": 0.0755, + "objective": 0.0785578116774559, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6541666388511658, + "regularize": 0.07855779677629471, + "step": 560 + }, + { + "dpo_loss": 0.5370542407035828, + "epoch": 3.2026452527161076, + "grad_norm": 16.61476983622863, + "learning_rate": 3.421002061924876e-07, + "logits": -1.6470757722854614, + "logps": -88.91401672363281, + "loss": 0.078, + "objective": 0.08550135046243668, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6666666865348816, + "regularize": 0.08550134301185608, + "step": 565 + }, + { + "dpo_loss": 0.5315267443656921, + "epoch": 3.230987246102976, + "grad_norm": 16.219608431973352, + "learning_rate": 3.327227093609824e-07, + "logits": -1.5615371465682983, + "logps": -88.98600006103516, + "loss": 0.0804, + "objective": 0.08551181107759476, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.6208333373069763, + "regularize": 0.08551178872585297, + "step": 570 + }, + { + "dpo_loss": 0.5380645990371704, + "epoch": 3.259329239489844, + "grad_norm": 15.599509722839054, + "learning_rate": 3.234110104457536e-07, + "logits": -1.65503990650177, + "logps": -89.4914321899414, + "loss": 0.0761, + "objective": 0.08411499112844467, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6708333492279053, + "regularize": 0.08411499112844467, + "step": 575 + }, + { + "dpo_loss": 0.5447063446044922, + "epoch": 3.287671232876712, + "grad_norm": 14.709745903577328, + "learning_rate": 3.141687721698363e-07, + "logits": -1.6440101861953735, + "logps": -90.87422943115234, + "loss": 0.0766, + "objective": 0.06781290471553802, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6499999761581421, + "regularize": 0.06781289726495743, + "step": 580 + }, + { + "dpo_loss": 0.544808566570282, + "epoch": 3.3160132262635806, + "grad_norm": 14.95898016236234, + "learning_rate": 3.049996299341742e-07, + "logits": -1.67787766456604, + "logps": -90.5733871459961, + "loss": 0.077, + "objective": 0.07407450675964355, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6791666746139526, + "regularize": 0.07407449930906296, + "step": 585 + }, + { + "dpo_loss": 0.5435771346092224, + "epoch": 3.3443552196504487, + "grad_norm": 16.129826241390184, + "learning_rate": 2.959071903876486e-07, + "logits": -1.689025640487671, + "logps": -90.51628875732422, + "loss": 0.0756, + "objective": 0.060641638934612274, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6541666388511658, + "regularize": 0.06064163148403168, + "step": 590 + }, + { + "dpo_loss": 0.5342642664909363, + "epoch": 3.372697213037317, + "grad_norm": 15.00734617596817, + "learning_rate": 2.86895030008416e-07, + "logits": -1.6080927848815918, + "logps": -89.73548126220703, + "loss": 0.0743, + "objective": 0.07631801813840866, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6416666507720947, + "regularize": 0.07631801068782806, + "step": 595 + }, + { + "dpo_loss": 0.517924964427948, + "epoch": 3.4010392064241852, + "grad_norm": 14.448635430187423, + "learning_rate": 2.779666936971129e-07, + "logits": -1.6234545707702637, + "logps": -89.56751251220703, + "loss": 0.0723, + "objective": 0.07954961806535721, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6625000238418579, + "regularize": 0.07954961806535721, + "step": 600 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 0.6840184330940247, + "eval_logits": -1.6933923959732056, + "eval_logps": -96.77139282226562, + "eval_loss": 0.4082699120044708, + "eval_objective": 0.41121208667755127, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5227272510528564, + "eval_regularize": 0.41121208667755127, + "eval_runtime": 261.2463, + "eval_samples_per_second": 22.163, + "eval_steps_per_second": 0.926, + "step": 600 + }, + { + "dpo_loss": 0.544577956199646, + "epoch": 3.4293811998110533, + "grad_norm": 15.344467617289204, + "learning_rate": 2.6912569338248315e-07, + "logits": -1.677703857421875, + "logps": -91.40158081054688, + "loss": 0.0712, + "objective": 0.07372996211051941, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6541666388511658, + "regularize": 0.07372996211051941, + "step": 605 + }, + { + "dpo_loss": 0.5388127565383911, + "epoch": 3.4577231931979218, + "grad_norm": 14.865347998745246, + "learning_rate": 2.603755066399718e-07, + "logits": -1.6282384395599365, + "logps": -89.45064544677734, + "loss": 0.0716, + "objective": 0.06525908410549164, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6541666388511658, + "regularize": 0.06525907665491104, + "step": 610 + }, + { + "dpo_loss": 0.5260815620422363, + "epoch": 3.48606518658479, + "grad_norm": 13.789444853566813, + "learning_rate": 2.517195753238345e-07, + "logits": -1.6580389738082886, + "logps": -90.04885864257812, + "loss": 0.071, + "objective": 0.0720449835062027, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.6041666865348816, + "regularize": 0.0720449835062027, + "step": 615 + }, + { + "dpo_loss": 0.5217207670211792, + "epoch": 3.514407179971658, + "grad_norm": 13.823586682790467, + "learning_rate": 2.4316130421329696e-07, + "logits": -1.5293523073196411, + "logps": -89.2181625366211, + "loss": 0.071, + "objective": 0.06358211487531662, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.6333333253860474, + "regularize": 0.06358211487531662, + "step": 620 + }, + { + "dpo_loss": 0.5324966311454773, + "epoch": 3.5427491733585263, + "grad_norm": 15.423238852250835, + "learning_rate": 2.3470405967329604e-07, + "logits": -1.6097266674041748, + "logps": -87.8069076538086, + "loss": 0.0677, + "objective": 0.0661839172244072, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6625000238418579, + "regularize": 0.0661839097738266, + "step": 625 + }, + { + "dpo_loss": 0.548510730266571, + "epoch": 3.5710911667453944, + "grad_norm": 14.649308039914828, + "learning_rate": 2.2635116833033392e-07, + "logits": -1.6878260374069214, + "logps": -88.3050765991211, + "loss": 0.0735, + "objective": 0.07757600396871567, + "ranking_idealized": 0.6333333253860474, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6333333253860474, + "regularize": 0.07757599651813507, + "step": 630 + }, + { + "dpo_loss": 0.5425411462783813, + "epoch": 3.5994331601322624, + "grad_norm": 15.066685089968594, + "learning_rate": 2.181059157639598e-07, + "logits": -1.6372451782226562, + "logps": -88.1607666015625, + "loss": 0.0663, + "objective": 0.06245482712984085, + "ranking_idealized": 0.637499988079071, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6208333373069763, + "regularize": 0.06245479732751846, + "step": 635 + }, + { + "dpo_loss": 0.5177238583564758, + "epoch": 3.627775153519131, + "grad_norm": 14.538482874181112, + "learning_rate": 2.0997154521440097e-07, + "logits": -1.565827488899231, + "logps": -89.97610473632812, + "loss": 0.0639, + "objective": 0.07200702279806137, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.6166666746139526, + "regularize": 0.07200699299573898, + "step": 640 + }, + { + "dpo_loss": 0.5268372893333435, + "epoch": 3.656117146905999, + "grad_norm": 14.714394354372583, + "learning_rate": 2.0195125630684428e-07, + "logits": -1.5924190282821655, + "logps": -87.3132553100586, + "loss": 0.0666, + "objective": 0.06948961317539215, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.6416666507720947, + "regularize": 0.06948959827423096, + "step": 645 + }, + { + "dpo_loss": 0.5451396703720093, + "epoch": 3.6844591402928675, + "grad_norm": 14.392341763219553, + "learning_rate": 1.9404820379287672e-07, + "logits": -1.5905060768127441, + "logps": -88.871337890625, + "loss": 0.0603, + "objective": 0.05459137260913849, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.637499988079071, + "regularize": 0.05459136515855789, + "step": 650 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 0.6846064329147339, + "eval_logits": -1.6889305114746094, + "eval_logps": -95.68582916259766, + "eval_loss": 0.40852683782577515, + "eval_objective": 0.4125712513923645, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.4125712513923645, + "eval_runtime": 260.1365, + "eval_samples_per_second": 22.258, + "eval_steps_per_second": 0.93, + "step": 650 + }, + { + "dpo_loss": 0.5369049906730652, + "epoch": 3.7128011336797355, + "grad_norm": 14.716721190910569, + "learning_rate": 1.8626549630957395e-07, + "logits": -1.6153782606124878, + "logps": -88.33747100830078, + "loss": 0.0672, + "objective": 0.061821334064006805, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6458333134651184, + "regularize": 0.06182133033871651, + "step": 655 + }, + { + "dpo_loss": 0.5374411344528198, + "epoch": 3.7411431270666036, + "grad_norm": 14.492926045642323, + "learning_rate": 1.7860619515673032e-07, + "logits": -1.7408215999603271, + "logps": -88.87337493896484, + "loss": 0.0663, + "objective": 0.06822895258665085, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6833333373069763, + "regularize": 0.06822895258665085, + "step": 660 + }, + { + "dpo_loss": 0.5354861617088318, + "epoch": 3.769485120453472, + "grad_norm": 15.39617438199354, + "learning_rate": 1.7107331309270684e-07, + "logits": -1.6346544027328491, + "logps": -88.30725860595703, + "loss": 0.0664, + "objective": 0.06315968185663223, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.675000011920929, + "regularize": 0.06315967440605164, + "step": 665 + }, + { + "dpo_loss": 0.5363784432411194, + "epoch": 3.79782711384034, + "grad_norm": 15.749195791822254, + "learning_rate": 1.6366981314937372e-07, + "logits": -1.7264405488967896, + "logps": -88.70533752441406, + "loss": 0.0642, + "objective": 0.07602261006832123, + "ranking_idealized": 0.7333333492279053, + "ranking_idealized_expo": 0.5958333611488342, + "ranking_simple": 0.7333333492279053, + "regularize": 0.07602259516716003, + "step": 670 + }, + { + "dpo_loss": 0.5361024737358093, + "epoch": 3.826169107227208, + "grad_norm": 14.754855609421504, + "learning_rate": 1.5639860746661338e-07, + "logits": -1.674873948097229, + "logps": -87.9578857421875, + "loss": 0.0639, + "objective": 0.057048484683036804, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6583333611488342, + "regularize": 0.057048480957746506, + "step": 675 + }, + { + "dpo_loss": 0.546769917011261, + "epoch": 3.8545111006140766, + "grad_norm": 16.28642539144059, + "learning_rate": 1.492625561468393e-07, + "logits": -1.6343268156051636, + "logps": -88.65126037597656, + "loss": 0.0635, + "objective": 0.0622069351375103, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6291666626930237, + "regularize": 0.062206920236349106, + "step": 680 + }, + { + "dpo_loss": 0.5258111357688904, + "epoch": 3.8828530940009447, + "grad_norm": 15.221617480212148, + "learning_rate": 1.4226446612998671e-07, + "logits": -1.7157925367355347, + "logps": -87.88172149658203, + "loss": 0.0595, + "objective": 0.051657140254974365, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.6791666746139526, + "regularize": 0.05165712162852287, + "step": 685 + }, + { + "dpo_loss": 0.5301392674446106, + "epoch": 3.9111950873878127, + "grad_norm": 14.71114521040863, + "learning_rate": 1.3540709008941147e-07, + "logits": -1.64604651927948, + "logps": -86.96089935302734, + "loss": 0.0627, + "objective": 0.05803840234875679, + "ranking_idealized": 0.7041666507720947, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6875, + "regularize": 0.05803839489817619, + "step": 690 + }, + { + "dpo_loss": 0.5404530763626099, + "epoch": 3.9395370807746812, + "grad_norm": 14.300468193938293, + "learning_rate": 1.2869312534913685e-07, + "logits": -1.5944476127624512, + "logps": -88.3801498413086, + "loss": 0.065, + "objective": 0.06985396891832352, + "ranking_idealized": 0.6083333492279053, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.6041666865348816, + "regularize": 0.06985396891832352, + "step": 695 + }, + { + "dpo_loss": 0.5349648594856262, + "epoch": 3.9678790741615493, + "grad_norm": 14.848538983349796, + "learning_rate": 1.2212521282287093e-07, + "logits": -1.640931487083435, + "logps": -88.85566711425781, + "loss": 0.0658, + "objective": 0.06192382797598839, + "ranking_idealized": 0.612500011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6083333492279053, + "regularize": 0.06192382052540779, + "step": 700 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 0.6843095421791077, + "eval_logits": -1.6961629390716553, + "eval_logps": -95.92644500732422, + "eval_loss": 0.40860193967819214, + "eval_objective": 0.41189059615135193, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5216942429542542, + "eval_regularize": 0.41189059615135193, + "eval_runtime": 260.0344, + "eval_samples_per_second": 22.266, + "eval_steps_per_second": 0.931, + "step": 700 + }, + { + "dpo_loss": 0.5312646627426147, + "epoch": 3.9962210675484178, + "grad_norm": 14.809174954752928, + "learning_rate": 1.15705935975212e-07, + "logits": -1.5398000478744507, + "logps": -87.27682495117188, + "loss": 0.065, + "objective": 0.06780902296304703, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6625000238418579, + "regularize": 0.06780902296304703, + "step": 705 + }, + { + "dpo_loss": 0.5272520780563354, + "epoch": 4.024563060935286, + "grad_norm": 15.262991388495523, + "learning_rate": 1.094378198054533e-07, + "logits": -1.63226318359375, + "logps": -88.91695404052734, + "loss": 0.0593, + "objective": 0.0699063390493393, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.4791666567325592, + "ranking_simple": 0.6499999761581421, + "regularize": 0.0699063315987587, + "step": 710 + }, + { + "dpo_loss": 0.5363659262657166, + "epoch": 4.052905054322154, + "grad_norm": 16.13258446285675, + "learning_rate": 1.0332332985438247e-07, + "logits": -1.5873126983642578, + "logps": -88.78561401367188, + "loss": 0.0526, + "objective": 0.05241125822067261, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6666666865348816, + "regularize": 0.052411243319511414, + "step": 715 + }, + { + "dpo_loss": 0.521473228931427, + "epoch": 4.081247047709022, + "grad_norm": 15.52892462347426, + "learning_rate": 9.736487123447068e-08, + "logits": -1.499315619468689, + "logps": -91.1855697631836, + "loss": 0.0526, + "objective": 0.04763669893145561, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6458333134651184, + "regularize": 0.04763669893145561, + "step": 720 + }, + { + "dpo_loss": 0.5466646552085876, + "epoch": 4.109589041095891, + "grad_norm": 15.248617054291332, + "learning_rate": 9.156478768383058e-08, + "logits": -1.6146501302719116, + "logps": -88.77969360351562, + "loss": 0.0565, + "objective": 0.05335882678627968, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6416666507720947, + "regularize": 0.05335881933569908, + "step": 725 + }, + { + "dpo_loss": 0.5291512608528137, + "epoch": 4.137931034482759, + "grad_norm": 15.105310283399879, + "learning_rate": 8.592536064431466e-08, + "logits": -1.687792181968689, + "logps": -88.5714340209961, + "loss": 0.0564, + "objective": 0.05620870739221573, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6583333611488342, + "regularize": 0.05620870366692543, + "step": 730 + }, + { + "dpo_loss": 0.5384243130683899, + "epoch": 4.166273027869627, + "grad_norm": 15.990640609108542, + "learning_rate": 8.044880836411888e-08, + "logits": -1.5998725891113281, + "logps": -89.7979736328125, + "loss": 0.0511, + "objective": 0.047256775200366974, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6791666746139526, + "regularize": 0.04725676402449608, + "step": 735 + }, + { + "dpo_loss": 0.5150901079177856, + "epoch": 4.194615021256495, + "grad_norm": 14.62250055664198, + "learning_rate": 7.513728502524286e-08, + "logits": -1.5944503545761108, + "logps": -89.62586212158203, + "loss": 0.0534, + "objective": 0.05004221946001053, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6708333492279053, + "regularize": 0.05004219710826874, + "step": 740 + }, + { + "dpo_loss": 0.5289852023124695, + "epoch": 4.222957014643363, + "grad_norm": 15.558949956501518, + "learning_rate": 6.999287989614971e-08, + "logits": -1.6679482460021973, + "logps": -86.46797180175781, + "loss": 0.0517, + "objective": 0.05193919688463211, + "ranking_idealized": 0.6291666626930237, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6291666626930237, + "regularize": 0.051939185708761215, + "step": 745 + }, + { + "dpo_loss": 0.5270615220069885, + "epoch": 4.251299008030231, + "grad_norm": 17.47564517685443, + "learning_rate": 6.501761650996052e-08, + "logits": -1.787042498588562, + "logps": -88.59746551513672, + "loss": 0.0521, + "objective": 0.061865612864494324, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6416666507720947, + "regularize": 0.06186559051275253, + "step": 750 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 0.6843414902687073, + "eval_logits": -1.6900094747543335, + "eval_logps": -95.91879272460938, + "eval_loss": 0.4082893431186676, + "eval_objective": 0.41186439990997314, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5227272510528564, + "eval_regularize": 0.41186439990997314, + "eval_runtime": 260.1497, + "eval_samples_per_second": 22.256, + "eval_steps_per_second": 0.93, + "step": 750 + }, + { + "dpo_loss": 0.529739260673523, + "epoch": 4.2796410014171, + "grad_norm": 14.371086729219567, + "learning_rate": 6.021345186850418e-08, + "logits": -1.6882461309432983, + "logps": -88.36084747314453, + "loss": 0.0539, + "objective": 0.05107791721820831, + "ranking_idealized": 0.6791666746139526, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6541666388511658, + "regularize": 0.05107790231704712, + "step": 755 + }, + { + "dpo_loss": 0.5281350612640381, + "epoch": 4.307982994803968, + "grad_norm": 16.117421305971423, + "learning_rate": 5.5582275672538316e-08, + "logits": -1.6556658744812012, + "logps": -89.51905822753906, + "loss": 0.0458, + "objective": 0.04781101644039154, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.6875, + "regularize": 0.04781101644039154, + "step": 760 + }, + { + "dpo_loss": 0.51561039686203, + "epoch": 4.336324988190836, + "grad_norm": 14.994145013136821, + "learning_rate": 5.112590957844232e-08, + "logits": -1.697762370109558, + "logps": -89.37593078613281, + "loss": 0.0528, + "objective": 0.054953668266534805, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.7250000238418579, + "regularize": 0.05495365336537361, + "step": 765 + }, + { + "dpo_loss": 0.5164503455162048, + "epoch": 4.364666981577704, + "grad_norm": 13.892500924387873, + "learning_rate": 4.684610648167503e-08, + "logits": -1.5592148303985596, + "logps": -87.5302734375, + "loss": 0.0516, + "objective": 0.04956016317009926, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6541666388511658, + "regularize": 0.049560148268938065, + "step": 770 + }, + { + "dpo_loss": 0.5293823480606079, + "epoch": 4.393008974964572, + "grad_norm": 14.429815473976188, + "learning_rate": 4.274454982728032e-08, + "logits": -1.614122986793518, + "logps": -88.37311553955078, + "loss": 0.05, + "objective": 0.05319143459200859, + "ranking_idealized": 0.6666666865348816, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6666666865348816, + "regularize": 0.053191423416137695, + "step": 775 + }, + { + "dpo_loss": 0.5353278517723083, + "epoch": 4.42135096835144, + "grad_norm": 14.827503239956233, + "learning_rate": 3.882285294770937e-08, + "logits": -1.673889398574829, + "logps": -86.99459075927734, + "loss": 0.0486, + "objective": 0.050799839198589325, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.6333333253860474, + "regularize": 0.05079982802271843, + "step": 780 + }, + { + "dpo_loss": 0.5483137369155884, + "epoch": 4.449692961738309, + "grad_norm": 13.762782844472444, + "learning_rate": 3.508255842822255e-08, + "logits": -1.6981257200241089, + "logps": -87.26993560791016, + "loss": 0.0577, + "objective": 0.060258664190769196, + "ranking_idealized": 0.6583333611488342, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6333333253860474, + "regularize": 0.0602586455643177, + "step": 785 + }, + { + "dpo_loss": 0.5247166156768799, + "epoch": 4.478034955125177, + "grad_norm": 14.343748620962772, + "learning_rate": 3.15251375001192e-08, + "logits": -1.6197153329849243, + "logps": -89.92550659179688, + "loss": 0.0519, + "objective": 0.06482961773872375, + "ranking_idealized": 0.7291666865348816, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.7166666388511658, + "regularize": 0.06482960283756256, + "step": 790 + }, + { + "dpo_loss": 0.5297049880027771, + "epoch": 4.506376948512045, + "grad_norm": 14.241023462964922, + "learning_rate": 2.8151989462033787e-08, + "logits": -1.5452524423599243, + "logps": -89.73005676269531, + "loss": 0.0535, + "objective": 0.0436834916472435, + "ranking_idealized": 0.6541666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.6499999761581421, + "regularize": 0.043683480471372604, + "step": 795 + }, + { + "dpo_loss": 0.5308266878128052, + "epoch": 4.534718941898913, + "grad_norm": 14.965902621487606, + "learning_rate": 2.4964441129527335e-08, + "logits": -1.5640093088150024, + "logps": -88.82685089111328, + "loss": 0.0529, + "objective": 0.056653544306755066, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.6666666865348816, + "regularize": 0.05665354058146477, + "step": 800 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 0.684349000453949, + "eval_logits": -1.6918096542358398, + "eval_logps": -95.80995178222656, + "eval_loss": 0.4081306457519531, + "eval_objective": 0.41187557578086853, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.5206611752510071, + "eval_regularize": 0.41187557578086853, + "eval_runtime": 260.1251, + "eval_samples_per_second": 22.259, + "eval_steps_per_second": 0.93, + "step": 800 + }, + { + "dpo_loss": 0.5310518145561218, + "epoch": 4.563060935285781, + "grad_norm": 14.644913526257378, + "learning_rate": 2.1963746313188757e-08, + "logits": -1.612503170967102, + "logps": -87.55821990966797, + "loss": 0.0533, + "objective": 0.05946908891201019, + "ranking_idealized": 0.6458333134651184, + "ranking_idealized_expo": 0.46666666865348816, + "ranking_simple": 0.637499988079071, + "regularize": 0.0594690665602684, + "step": 805 + }, + { + "dpo_loss": 0.5326514840126038, + "epoch": 4.59140292867265, + "grad_norm": 14.784289994959185, + "learning_rate": 1.915108532545351e-08, + "logits": -1.7028088569641113, + "logps": -87.26454162597656, + "loss": 0.0486, + "objective": 0.0378708690404892, + "ranking_idealized": 0.5874999761581421, + "ranking_idealized_expo": 0.4625000059604645, + "ranking_simple": 0.5874999761581421, + "regularize": 0.0378708578646183, + "step": 810 + }, + { + "dpo_loss": 0.5349629521369934, + "epoch": 4.619744922059518, + "grad_norm": 13.924074189602024, + "learning_rate": 1.6527564516331638e-08, + "logits": -1.5550504922866821, + "logps": -88.41017150878906, + "loss": 0.0474, + "objective": 0.03614622354507446, + "ranking_idealized": 0.7250000238418579, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.7208333611488342, + "regularize": 0.036146197468042374, + "step": 815 + }, + { + "dpo_loss": 0.5395488142967224, + "epoch": 4.648086915446386, + "grad_norm": 14.728879231266445, + "learning_rate": 1.4094215838229172e-08, + "logits": -1.6426359415054321, + "logps": -88.95872497558594, + "loss": 0.0497, + "objective": 0.04463572055101395, + "ranking_idealized": 0.6875, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6916666626930237, + "regularize": 0.044635698199272156, + "step": 820 + }, + { + "dpo_loss": 0.5434182286262512, + "epoch": 4.6764289088332545, + "grad_norm": 14.648978111763995, + "learning_rate": 1.1851996440033318e-08, + "logits": -1.5325695276260376, + "logps": -86.70441436767578, + "loss": 0.048, + "objective": 0.04530922695994377, + "ranking_idealized": 0.6166666746139526, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.612500011920929, + "regularize": 0.04530921205878258, + "step": 825 + }, + { + "dpo_loss": 0.5301134586334229, + "epoch": 4.7047709022201225, + "grad_norm": 14.659203003472166, + "learning_rate": 9.801788290621505e-09, + "logits": -1.7177317142486572, + "logps": -88.56446838378906, + "loss": 0.0537, + "objective": 0.0582742765545845, + "ranking_idealized": 0.675000011920929, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.675000011920929, + "regularize": 0.05827426165342331, + "step": 830 + }, + { + "dpo_loss": 0.5137851238250732, + "epoch": 4.733112895606991, + "grad_norm": 14.143035439452712, + "learning_rate": 7.944397831941951e-09, + "logits": -1.6337097883224487, + "logps": -88.74520874023438, + "loss": 0.0469, + "objective": 0.046563997864723206, + "ranking_idealized": 0.6499999761581421, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.6416666507720947, + "regularize": 0.04656398296356201, + "step": 835 + }, + { + "dpo_loss": 0.5074983835220337, + "epoch": 4.7614548889938595, + "grad_norm": 14.776976768313608, + "learning_rate": 6.280555661802856e-09, + "logits": -1.637603759765625, + "logps": -89.17638397216797, + "loss": 0.049, + "objective": 0.04939265549182892, + "ranking_idealized": 0.6958333253860474, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.675000011920929, + "regularize": 0.049392636865377426, + "step": 840 + }, + { + "dpo_loss": 0.5405561327934265, + "epoch": 4.7897968823807275, + "grad_norm": 14.924755507827577, + "learning_rate": 4.810916246494157e-09, + "logits": -1.6720634698867798, + "logps": -88.51549530029297, + "loss": 0.0508, + "objective": 0.048842187970876694, + "ranking_idealized": 0.6416666507720947, + "ranking_idealized_expo": 0.4583333432674408, + "ranking_simple": 0.6291666626930237, + "regularize": 0.048842158168554306, + "step": 845 + }, + { + "dpo_loss": 0.5286487340927124, + "epoch": 4.818138875767596, + "grad_norm": 14.741530280772613, + "learning_rate": 3.5360576633558513e-09, + "logits": -1.604957938194275, + "logps": -87.53111267089844, + "loss": 0.0471, + "objective": 0.049042269587516785, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6708333492279053, + "regularize": 0.04904225841164589, + "step": 850 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 0.6843511462211609, + "eval_logits": -1.6919739246368408, + "eval_logps": -95.9782485961914, + "eval_loss": 0.40833279490470886, + "eval_objective": 0.4120672345161438, + "eval_ranking_idealized": 0.6570248007774353, + "eval_ranking_idealized_expo": 0.5113636255264282, + "eval_ranking_simple": 0.51962810754776, + "eval_regularize": 0.4120672345161438, + "eval_runtime": 259.9887, + "eval_samples_per_second": 22.27, + "eval_steps_per_second": 0.931, + "step": 850 + }, + { + "dpo_loss": 0.5401243567466736, + "epoch": 4.846480869154464, + "grad_norm": 14.09171530539623, + "learning_rate": 2.4564813733932155e-09, + "logits": -1.6012241840362549, + "logps": -88.17009735107422, + "loss": 0.047, + "objective": 0.04625382646918297, + "ranking_idealized": 0.7083333134651184, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.6916666626930237, + "regularize": 0.04625379666686058, + "step": 855 + }, + { + "dpo_loss": 0.5225604772567749, + "epoch": 4.874822862541333, + "grad_norm": 14.951871396423634, + "learning_rate": 1.5726120240288631e-09, + "logits": -1.5658340454101562, + "logps": -87.6793212890625, + "loss": 0.0434, + "objective": 0.04740705341100693, + "ranking_idealized": 0.7166666388511658, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.7041666507720947, + "regularize": 0.047407038509845734, + "step": 860 + }, + { + "dpo_loss": 0.5320844054222107, + "epoch": 4.903164855928201, + "grad_norm": 14.202693717147874, + "learning_rate": 8.847972820693051e-10, + "logits": -1.6407142877578735, + "logps": -86.8409652709961, + "loss": 0.0464, + "objective": 0.06202062591910362, + "ranking_idealized": 0.6625000238418579, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.637499988079071, + "regularize": 0.06202061474323273, + "step": 865 + }, + { + "dpo_loss": 0.5251496434211731, + "epoch": 4.931506849315069, + "grad_norm": 14.584157991733704, + "learning_rate": 3.933076969516724e-10, + "logits": -1.6990463733673096, + "logps": -88.78810119628906, + "loss": 0.0513, + "objective": 0.0480065755546093, + "ranking_idealized": 0.6916666626930237, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.675000011920929, + "regularize": 0.048006556928157806, + "step": 870 + }, + { + "dpo_loss": 0.5269615650177002, + "epoch": 4.959848842701937, + "grad_norm": 14.31816614460119, + "learning_rate": 9.833659432367803e-11, + "logits": -1.6176135540008545, + "logps": -88.72756958007812, + "loss": 0.046, + "objective": 0.035082168877124786, + "ranking_idealized": 0.6833333373069763, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6791666746139526, + "regularize": 0.03508211672306061, + "step": 875 + }, + { + "dpo_loss": 0.5263823866844177, + "epoch": 4.988190836088805, + "grad_norm": 14.834100494656957, + "learning_rate": 0.0, + "logits": -1.6788469552993774, + "logps": -89.02001953125, + "loss": 0.0442, + "objective": 0.03841705247759819, + "ranking_idealized": 0.6708333492279053, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.6708333492279053, + "regularize": 0.03841704502701759, + "step": 880 + }, + { + "epoch": 4.988190836088805, + "step": 880, + "total_flos": 0.0, + "train_loss": 0.1521414952352643, + "train_runtime": 35252.4203, + "train_samples_per_second": 7.205, + "train_steps_per_second": 0.025 + } + ], + "logging_steps": 5, + "max_steps": 880, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}