{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.988190836088805, "eval_steps": 50, "global_step": 880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.005668398677373642, "grad_norm": 1.3413641460666135, "learning_rate": 5.681818181818182e-08, "logits": -1.3147305250167847, "logps": -88.0877456665039, "loss": 0.4113, "objective": 0.41588976979255676, "ranking_idealized": 0.9791666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.41588976979255676, "step": 1 }, { "dpo_loss": 0.6931489109992981, "epoch": 0.02834199338686821, "grad_norm": 1.344017584578235, "learning_rate": 2.840909090909091e-07, "logits": -1.3680044412612915, "logps": -84.2520523071289, "loss": 0.4131, "objective": 0.3755461275577545, "ranking_idealized": 0.921875, "ranking_idealized_expo": 0.5729166865348816, "ranking_simple": 0.546875, "regularize": 0.3755461275577545, "step": 5 }, { "dpo_loss": 0.6928147077560425, "epoch": 0.05668398677373642, "grad_norm": 1.297307695578835, "learning_rate": 5.681818181818182e-07, "logits": -1.447161078453064, "logps": -82.31820678710938, "loss": 0.4176, "objective": 0.4424538016319275, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5166666507720947, "regularize": 0.4424538016319275, "step": 10 }, { "dpo_loss": 0.6928682923316956, "epoch": 0.08502598016060463, "grad_norm": 1.2486707608263468, "learning_rate": 8.522727272727273e-07, "logits": -1.4277892112731934, "logps": -81.3590316772461, "loss": 0.4254, "objective": 0.41196563839912415, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.512499988079071, "regularize": 0.41196563839912415, "step": 15 }, { "dpo_loss": 0.6925787329673767, "epoch": 0.11336797354747284, "grad_norm": 1.4692119961571695, "learning_rate": 1.1363636363636364e-06, "logits": -1.4481867551803589, "logps": -81.8401870727539, "loss": 0.4151, "objective": 0.4033361077308655, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5291666388511658, "regularize": 0.4033361077308655, "step": 20 }, { "dpo_loss": 0.6910920739173889, "epoch": 0.14170996693434104, "grad_norm": 1.6122424998527856, "learning_rate": 1.4204545454545458e-06, "logits": -1.5582950115203857, "logps": -82.6436538696289, "loss": 0.4117, "objective": 0.43133974075317383, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5166666507720947, "regularize": 0.43133974075317383, "step": 25 }, { "dpo_loss": 0.6899585723876953, "epoch": 0.17005196032120926, "grad_norm": 2.145868200166189, "learning_rate": 1.7045454545454546e-06, "logits": -1.6009422540664673, "logps": -86.27643585205078, "loss": 0.4119, "objective": 0.4145013391971588, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5666666626930237, "regularize": 0.4145013391971588, "step": 30 }, { "dpo_loss": 0.6881429553031921, "epoch": 0.19839395370807747, "grad_norm": 3.250036749772235, "learning_rate": 1.9886363636363638e-06, "logits": -1.6226321458816528, "logps": -95.08840942382812, "loss": 0.408, "objective": 0.3913627564907074, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.550000011920929, "regularize": 0.3913627564907074, "step": 35 }, { "dpo_loss": 0.681670606136322, "epoch": 0.22673594709494568, "grad_norm": 2.7778046190059134, "learning_rate": 2.2727272727272728e-06, "logits": -1.6656767129898071, "logps": -101.75907135009766, "loss": 0.4132, "objective": 0.42999422550201416, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6041666865348816, "regularize": 0.42999422550201416, "step": 40 }, { "dpo_loss": 0.6795368790626526, "epoch": 0.25507794048181387, "grad_norm": 3.073343526840778, "learning_rate": 2.556818181818182e-06, "logits": -1.7650772333145142, "logps": -107.9706039428711, "loss": 0.4172, "objective": 0.46339866518974304, "ranking_idealized": 0.9666666388511658, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6083333492279053, "regularize": 0.46339866518974304, "step": 45 }, { "dpo_loss": 0.6784433126449585, "epoch": 0.2834199338686821, "grad_norm": 4.230883583179675, "learning_rate": 2.8409090909090916e-06, "logits": -1.6497570276260376, "logps": -111.7117919921875, "loss": 0.4052, "objective": 0.3878687024116516, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.5666666626930237, "regularize": 0.3878687024116516, "step": 50 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.691393256187439, "eval_logits": -1.8292194604873657, "eval_logps": -129.08827209472656, "eval_loss": 0.4106997549533844, "eval_objective": 0.41201457381248474, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5371900796890259, "eval_regularize": 0.41201457381248474, "eval_runtime": 265.4611, "eval_samples_per_second": 21.811, "eval_steps_per_second": 0.912, "step": 50 }, { "dpo_loss": 0.6729306578636169, "epoch": 0.3117619272555503, "grad_norm": 5.170133309231958, "learning_rate": 3.125e-06, "logits": -1.7047711610794067, "logps": -128.38836669921875, "loss": 0.3945, "objective": 0.39437106251716614, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6208333373069763, "regularize": 0.39437106251716614, "step": 55 }, { "dpo_loss": 0.673882246017456, "epoch": 0.3401039206424185, "grad_norm": 4.832516943098698, "learning_rate": 3.409090909090909e-06, "logits": -1.6675713062286377, "logps": -128.96734619140625, "loss": 0.3855, "objective": 0.3761754035949707, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6041666865348816, "regularize": 0.3761754035949707, "step": 60 }, { "dpo_loss": 0.669740617275238, "epoch": 0.3684459140292867, "grad_norm": 5.367256043496177, "learning_rate": 3.6931818181818186e-06, "logits": -1.5951703786849976, "logps": -137.0664520263672, "loss": 0.3701, "objective": 0.36182090640068054, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6000000238418579, "regularize": 0.36182090640068054, "step": 65 }, { "dpo_loss": 0.6654062271118164, "epoch": 0.39678790741615494, "grad_norm": 4.94424534111878, "learning_rate": 3.9772727272727275e-06, "logits": -1.697352647781372, "logps": -133.348388671875, "loss": 0.3724, "objective": 0.40955594182014465, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.637499988079071, "regularize": 0.40955594182014465, "step": 70 }, { "dpo_loss": 0.6624744534492493, "epoch": 0.42512990080302315, "grad_norm": 5.966524918481801, "learning_rate": 4.2613636363636365e-06, "logits": -1.8367187976837158, "logps": -136.5087432861328, "loss": 0.3757, "objective": 0.38774457573890686, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.625, "regularize": 0.38774457573890686, "step": 75 }, { "dpo_loss": 0.6609200835227966, "epoch": 0.45347189418989137, "grad_norm": 6.280774873594145, "learning_rate": 4.5454545454545455e-06, "logits": -1.9799270629882812, "logps": -158.3598175048828, "loss": 0.356, "objective": 0.364311546087265, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6625000238418579, "regularize": 0.364311546087265, "step": 80 }, { "dpo_loss": 0.6481165289878845, "epoch": 0.4818138875767596, "grad_norm": 6.596386501175196, "learning_rate": 4.829545454545455e-06, "logits": -2.1429412364959717, "logps": -150.511474609375, "loss": 0.3409, "objective": 0.34856364130973816, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6625000238418579, "regularize": 0.34856364130973816, "step": 85 }, { "dpo_loss": 0.6467424035072327, "epoch": 0.5101558809636277, "grad_norm": 7.381865971758164, "learning_rate": 4.999921328558333e-06, "logits": -2.0832204818725586, "logps": -174.22291564941406, "loss": 0.3472, "objective": 0.3462918698787689, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6333333253860474, "regularize": 0.3462918698787689, "step": 90 }, { "dpo_loss": 0.6253587007522583, "epoch": 0.538497874350496, "grad_norm": 8.119206476955762, "learning_rate": 4.999036331701828e-06, "logits": -2.2299115657806396, "logps": -187.98475646972656, "loss": 0.3267, "objective": 0.33174222707748413, "ranking_idealized": 0.9624999761581421, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.7291666865348816, "regularize": 0.33174222707748413, "step": 95 }, { "dpo_loss": 0.6211538314819336, "epoch": 0.5668398677373642, "grad_norm": 6.956710000085001, "learning_rate": 4.997168347957521e-06, "logits": -2.3657114505767822, "logps": -187.91744995117188, "loss": 0.3407, "objective": 0.3683268129825592, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.7083333134651184, "regularize": 0.3683268129825592, "step": 100 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6838738322257996, "eval_logits": -2.5065720081329346, "eval_logps": -173.33192443847656, "eval_loss": 0.40174734592437744, "eval_objective": 0.40628084540367126, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5547520518302917, "eval_regularize": 0.40628084540367126, "eval_runtime": 259.1906, "eval_samples_per_second": 22.339, "eval_steps_per_second": 0.934, "step": 100 }, { "dpo_loss": 0.6136354804039001, "epoch": 0.5951818611242324, "grad_norm": 9.080339976174114, "learning_rate": 4.994318112090048e-06, "logits": -2.1985232830047607, "logps": -186.09088134765625, "loss": 0.3217, "objective": 0.3403078019618988, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.737500011920929, "regularize": 0.3403078019618988, "step": 105 }, { "dpo_loss": 0.6185809969902039, "epoch": 0.6235238545111006, "grad_norm": 7.791051933395558, "learning_rate": 4.990486745229364e-06, "logits": -2.484309434890747, "logps": -186.34634399414062, "loss": 0.3255, "objective": 0.3505449593067169, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.699999988079071, "regularize": 0.3505449593067169, "step": 110 }, { "dpo_loss": 0.6309658288955688, "epoch": 0.6518658478979689, "grad_norm": 6.754375219280332, "learning_rate": 4.985675754429744e-06, "logits": -2.47392201423645, "logps": -166.2880859375, "loss": 0.3035, "objective": 0.2969822585582733, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6916666626930237, "regularize": 0.2969822585582733, "step": 115 }, { "dpo_loss": 0.6232146620750427, "epoch": 0.680207841284837, "grad_norm": 8.649064764793055, "learning_rate": 4.9798870320769884e-06, "logits": -2.4262490272521973, "logps": -179.76458740234375, "loss": 0.2997, "objective": 0.27002623677253723, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6916666626930237, "regularize": 0.27002623677253723, "step": 120 }, { "dpo_loss": 0.6118648648262024, "epoch": 0.7085498346717053, "grad_norm": 8.68434746516712, "learning_rate": 4.973122855144066e-06, "logits": -2.3900842666625977, "logps": -196.9748992919922, "loss": 0.2995, "objective": 0.2906176447868347, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.737500011920929, "regularize": 0.2906176447868347, "step": 125 }, { "dpo_loss": 0.6124536991119385, "epoch": 0.7368918280585735, "grad_norm": 7.486248737537153, "learning_rate": 4.965385884295467e-06, "logits": -2.4602267742156982, "logps": -182.41766357421875, "loss": 0.2873, "objective": 0.26530107855796814, "ranking_idealized": 0.9166666865348816, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.7250000238418579, "regularize": 0.26530107855796814, "step": 130 }, { "dpo_loss": 0.606670081615448, "epoch": 0.7652338214454416, "grad_norm": 7.285272647997681, "learning_rate": 4.956679162840646e-06, "logits": -2.281942844390869, "logps": -177.3143768310547, "loss": 0.269, "objective": 0.270210325717926, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.7208333611488342, "regularize": 0.270210325717926, "step": 135 }, { "dpo_loss": 0.6003846526145935, "epoch": 0.7935758148323099, "grad_norm": 7.413948788955954, "learning_rate": 4.947006115536947e-06, "logits": -2.1732773780822754, "logps": -176.40997314453125, "loss": 0.2664, "objective": 0.24727170169353485, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.7666666507720947, "regularize": 0.24727170169353485, "step": 140 }, { "dpo_loss": 0.6026275157928467, "epoch": 0.821917808219178, "grad_norm": 7.450961549840002, "learning_rate": 4.9363705472424825e-06, "logits": -2.2946832180023193, "logps": -178.30978393554688, "loss": 0.2707, "objective": 0.2942873537540436, "ranking_idealized": 0.9125000238418579, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.7250000238418579, "regularize": 0.2942873537540436, "step": 145 }, { "dpo_loss": 0.6038042306900024, "epoch": 0.8502598016060463, "grad_norm": 8.337666421628235, "learning_rate": 4.924776641419513e-06, "logits": -2.2924880981445312, "logps": -175.90670776367188, "loss": 0.2596, "objective": 0.2636435329914093, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.7541666626930237, "regularize": 0.2636435329914093, "step": 150 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6806454658508301, "eval_logits": -2.4464104175567627, "eval_logps": -188.63946533203125, "eval_loss": 0.4017498791217804, "eval_objective": 0.40516260266304016, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5423553586006165, "eval_regularize": 0.40516260266304016, "eval_runtime": 259.5657, "eval_samples_per_second": 22.306, "eval_steps_per_second": 0.932, "step": 150 }, { "dpo_loss": 0.5875340700149536, "epoch": 0.8786017949929145, "grad_norm": 8.964143655715564, "learning_rate": 4.9122289584888926e-06, "logits": -2.3187806606292725, "logps": -185.90478515625, "loss": 0.2651, "objective": 0.26818570494651794, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.800000011920929, "regularize": 0.26818570494651794, "step": 155 }, { "dpo_loss": 0.5894069671630859, "epoch": 0.9069437883797827, "grad_norm": 7.833211918555924, "learning_rate": 4.8987324340362445e-06, "logits": -2.2485156059265137, "logps": -194.0889129638672, "loss": 0.2472, "objective": 0.24632495641708374, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.7541666626930237, "regularize": 0.24632495641708374, "step": 160 }, { "dpo_loss": 0.5998678803443909, "epoch": 0.9352857817666509, "grad_norm": 6.957604774053052, "learning_rate": 4.884292376870567e-06, "logits": -2.367635488510132, "logps": -169.78195190429688, "loss": 0.2564, "objective": 0.26594653725624084, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.7208333611488342, "regularize": 0.26594653725624084, "step": 165 }, { "dpo_loss": 0.5835825204849243, "epoch": 0.9636277751535192, "grad_norm": 8.930600570817928, "learning_rate": 4.868914466936038e-06, "logits": -2.363553047180176, "logps": -192.9253387451172, "loss": 0.2434, "objective": 0.22550734877586365, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.7708333134651184, "regularize": 0.22550733387470245, "step": 170 }, { "dpo_loss": 0.5910046100616455, "epoch": 0.9919697685403873, "grad_norm": 7.732085709182539, "learning_rate": 4.8526047530778175e-06, "logits": -2.2910239696502686, "logps": -191.64141845703125, "loss": 0.2496, "objective": 0.24141448736190796, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.7875000238418579, "regularize": 0.24141448736190796, "step": 175 }, { "dpo_loss": 0.5720356106758118, "epoch": 1.0203117619272555, "grad_norm": 7.878585873914542, "learning_rate": 4.835369650662767e-06, "logits": -2.5219788551330566, "logps": -186.59535217285156, "loss": 0.2212, "objective": 0.21063460409641266, "ranking_idealized": 0.9125000238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.8125, "regularize": 0.21063460409641266, "step": 180 }, { "dpo_loss": 0.5561386942863464, "epoch": 1.0486537553141237, "grad_norm": 8.429656140437402, "learning_rate": 4.817215939055984e-06, "logits": -2.44401478767395, "logps": -204.15762329101562, "loss": 0.2153, "objective": 0.21741175651550293, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.8458333611488342, "regularize": 0.21741175651550293, "step": 185 }, { "dpo_loss": 0.5703259706497192, "epoch": 1.076995748700992, "grad_norm": 6.743069420703677, "learning_rate": 4.798150758954164e-06, "logits": -2.489015817642212, "logps": -198.45516967773438, "loss": 0.2003, "objective": 0.17540977895259857, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.8333333134651184, "regularize": 0.17540977895259857, "step": 190 }, { "dpo_loss": 0.5622718930244446, "epoch": 1.10533774208786, "grad_norm": 7.079228513521207, "learning_rate": 4.778181609576832e-06, "logits": -2.429560661315918, "logps": -182.86814880371094, "loss": 0.1994, "objective": 0.16888141632080078, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.875, "regularize": 0.16888141632080078, "step": 195 }, { "dpo_loss": 0.5696191787719727, "epoch": 1.1336797354747283, "grad_norm": 6.830259665006017, "learning_rate": 4.757316345716554e-06, "logits": -2.513395071029663, "logps": -192.72938537597656, "loss": 0.1965, "objective": 0.20290271937847137, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.8416666388511658, "regularize": 0.20290271937847137, "step": 200 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6801125407218933, "eval_logits": -2.597655773162842, "eval_logps": -193.1246795654297, "eval_loss": 0.40018174052238464, "eval_objective": 0.40411826968193054, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.55888432264328, "eval_regularize": 0.40411826968193054, "eval_runtime": 259.3248, "eval_samples_per_second": 22.327, "eval_steps_per_second": 0.933, "step": 200 }, { "dpo_loss": 0.5719407200813293, "epoch": 1.1620217288615966, "grad_norm": 6.896008583963979, "learning_rate": 4.735563174649278e-06, "logits": -2.5157065391540527, "logps": -198.75962829589844, "loss": 0.205, "objective": 0.2138771265745163, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8166666626930237, "regularize": 0.2138771265745163, "step": 205 }, { "dpo_loss": 0.5655397176742554, "epoch": 1.1903637222484649, "grad_norm": 6.830065558874749, "learning_rate": 4.7129306529060415e-06, "logits": -2.547936201095581, "logps": -187.2952117919922, "loss": 0.2025, "objective": 0.22636540234088898, "ranking_idealized": 0.9041666388511658, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8166666626930237, "regularize": 0.2263653725385666, "step": 210 }, { "dpo_loss": 0.5676775574684143, "epoch": 1.2187057156353331, "grad_norm": 6.8520964757916945, "learning_rate": 4.68942768290728e-06, "logits": -2.54328989982605, "logps": -187.34585571289062, "loss": 0.1863, "objective": 0.19478672742843628, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.8416666388511658, "regularize": 0.19478671252727509, "step": 215 }, { "dpo_loss": 0.5695532560348511, "epoch": 1.2470477090222012, "grad_norm": 6.476230536915824, "learning_rate": 4.665063509461098e-06, "logits": -2.455770969390869, "logps": -187.62728881835938, "loss": 0.1957, "objective": 0.1894843727350235, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.6083333492279053, "ranking_simple": 0.8583333492279053, "regularize": 0.1894843727350235, "step": 220 }, { "dpo_loss": 0.5596610903739929, "epoch": 1.2753897024090695, "grad_norm": 6.702837995316673, "learning_rate": 4.639847716126855e-06, "logits": -2.4951536655426025, "logps": -189.64401245117188, "loss": 0.1959, "objective": 0.1725076138973236, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.8583333492279053, "regularize": 0.1725076138973236, "step": 225 }, { "dpo_loss": 0.5657731890678406, "epoch": 1.3037316957959377, "grad_norm": 7.276570061796103, "learning_rate": 4.613790221445511e-06, "logits": -2.5368714332580566, "logps": -193.73602294921875, "loss": 0.1875, "objective": 0.18021216988563538, "ranking_idealized": 0.9125000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.8041666746139526, "regularize": 0.18021215498447418, "step": 230 }, { "dpo_loss": 0.5439472794532776, "epoch": 1.3320736891828058, "grad_norm": 7.643293399256914, "learning_rate": 4.586901275038201e-06, "logits": -2.8654701709747314, "logps": -191.69354248046875, "loss": 0.1942, "objective": 0.2024109810590744, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.8416666388511658, "regularize": 0.2024109810590744, "step": 235 }, { "dpo_loss": 0.5640192031860352, "epoch": 1.360415682569674, "grad_norm": 7.002186203050705, "learning_rate": 4.559191453574582e-06, "logits": -2.7892987728118896, "logps": -191.39663696289062, "loss": 0.1853, "objective": 0.19372233748435974, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.7958333492279053, "regularize": 0.19372233748435974, "step": 240 }, { "dpo_loss": 0.5699235796928406, "epoch": 1.3887576759565423, "grad_norm": 7.135454177759647, "learning_rate": 4.530671656612544e-06, "logits": -2.747896909713745, "logps": -188.15423583984375, "loss": 0.172, "objective": 0.18116973340511322, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.8291666507720947, "regularize": 0.18116970360279083, "step": 245 }, { "dpo_loss": 0.5546202063560486, "epoch": 1.4170996693434104, "grad_norm": 6.856611462056187, "learning_rate": 4.501353102310901e-06, "logits": -2.626624822616577, "logps": -189.72596740722656, "loss": 0.1784, "objective": 0.19344764947891235, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.8541666865348816, "regularize": 0.19344764947891235, "step": 250 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6801539659500122, "eval_logits": -2.7528111934661865, "eval_logps": -189.47007751464844, "eval_loss": 0.39902833104133606, "eval_objective": 0.40230515599250793, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5619834661483765, "eval_regularize": 0.40230515599250793, "eval_runtime": 258.6625, "eval_samples_per_second": 22.384, "eval_steps_per_second": 0.936, "step": 250 }, { "dpo_loss": 0.5707473754882812, "epoch": 1.4454416627302786, "grad_norm": 6.789096244940944, "learning_rate": 4.4712473230167775e-06, "logits": -2.524132490158081, "logps": -189.31150817871094, "loss": 0.1841, "objective": 0.17286911606788635, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.8458333611488342, "regularize": 0.17286911606788635, "step": 255 }, { "dpo_loss": 0.5669309496879578, "epoch": 1.473783656117147, "grad_norm": 7.262023839822884, "learning_rate": 4.440366160729393e-06, "logits": -2.642547130584717, "logps": -196.33497619628906, "loss": 0.1778, "objective": 0.19296441972255707, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.8541666865348816, "regularize": 0.19296441972255707, "step": 260 }, { "dpo_loss": 0.546380877494812, "epoch": 1.5021256495040152, "grad_norm": 6.836875807788374, "learning_rate": 4.4087217624420595e-06, "logits": -2.617671251296997, "logps": -200.48138427734375, "loss": 0.18, "objective": 0.1762746423482895, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.8541666865348816, "regularize": 0.1762746423482895, "step": 265 }, { "dpo_loss": 0.5641717910766602, "epoch": 1.5304676428908834, "grad_norm": 6.457668960267231, "learning_rate": 4.376326575364206e-06, "logits": -2.5867457389831543, "logps": -194.27902221679688, "loss": 0.1782, "objective": 0.19895337522029877, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.8500000238418579, "regularize": 0.19895337522029877, "step": 270 }, { "dpo_loss": 0.5631863474845886, "epoch": 1.5588096362777515, "grad_norm": 6.814477250627082, "learning_rate": 4.34319334202531e-06, "logits": -2.5872161388397217, "logps": -194.5428924560547, "loss": 0.1719, "objective": 0.16667112708091736, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.824999988079071, "regularize": 0.16667112708091736, "step": 275 }, { "dpo_loss": 0.5430881977081299, "epoch": 1.5871516296646198, "grad_norm": 7.393370645908027, "learning_rate": 4.309335095262675e-06, "logits": -2.4844515323638916, "logps": -203.0095977783203, "loss": 0.1821, "objective": 0.18616026639938354, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.862500011920929, "regularize": 0.18616026639938354, "step": 280 }, { "dpo_loss": 0.5480186939239502, "epoch": 1.615493623051488, "grad_norm": 6.140317838250891, "learning_rate": 4.274765153095008e-06, "logits": -2.618225574493408, "logps": -192.17019653320312, "loss": 0.1677, "objective": 0.16235129535198212, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.8541666865348816, "regularize": 0.16235129535198212, "step": 285 }, { "dpo_loss": 0.5588306188583374, "epoch": 1.643835616438356, "grad_norm": 6.415461406420722, "learning_rate": 4.239497113483819e-06, "logits": -2.691134214401245, "logps": -191.76356506347656, "loss": 0.1678, "objective": 0.15764465928077698, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.8333333134651184, "regularize": 0.15764465928077698, "step": 290 }, { "dpo_loss": 0.5371195673942566, "epoch": 1.6721776098252243, "grad_norm": 6.197778580003095, "learning_rate": 4.203544848984729e-06, "logits": -2.665118455886841, "logps": -199.97247314453125, "loss": 0.1669, "objective": 0.15341004729270935, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8958333134651184, "regularize": 0.15341004729270935, "step": 295 }, { "dpo_loss": 0.5480075478553772, "epoch": 1.7005196032120926, "grad_norm": 6.138490802083033, "learning_rate": 4.16692250129073e-06, "logits": -2.7728724479675293, "logps": -199.02529907226562, "loss": 0.1717, "objective": 0.20036275684833527, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.875, "regularize": 0.20036275684833527, "step": 300 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6798518300056458, "eval_logits": -2.877673625946045, "eval_logps": -195.73040771484375, "eval_loss": 0.40205851197242737, "eval_objective": 0.40424150228500366, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5454545617103577, "eval_regularize": 0.40424150228500366, "eval_runtime": 259.7378, "eval_samples_per_second": 22.292, "eval_steps_per_second": 0.932, "step": 300 }, { "dpo_loss": 0.5607944130897522, "epoch": 1.7288615965989607, "grad_norm": 6.327660769011926, "learning_rate": 4.129644475669617e-06, "logits": -2.741549253463745, "logps": -191.5762481689453, "loss": 0.1638, "objective": 0.17037154734134674, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.8333333134651184, "regularize": 0.17037154734134674, "step": 305 }, { "dpo_loss": 0.5504526495933533, "epoch": 1.7572035899858292, "grad_norm": 6.7403750373805575, "learning_rate": 4.091725435297721e-06, "logits": -2.7614734172821045, "logps": -190.3129425048828, "loss": 0.1573, "objective": 0.1668892502784729, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.8500000238418579, "regularize": 0.1668892502784729, "step": 310 }, { "dpo_loss": 0.5544535517692566, "epoch": 1.7855455833726972, "grad_norm": 6.316947546186969, "learning_rate": 4.053180295492203e-06, "logits": -2.530224084854126, "logps": -189.7359161376953, "loss": 0.1665, "objective": 0.15285438299179077, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.8500000238418579, "regularize": 0.15285435318946838, "step": 315 }, { "dpo_loss": 0.5565517544746399, "epoch": 1.8138875767595655, "grad_norm": 6.951800105794237, "learning_rate": 4.014024217844167e-06, "logits": -2.596423864364624, "logps": -201.33631896972656, "loss": 0.1609, "objective": 0.1613713800907135, "ranking_idealized": 0.9041666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.8166666626930237, "regularize": 0.1613713800907135, "step": 320 }, { "dpo_loss": 0.5427613854408264, "epoch": 1.8422295701464337, "grad_norm": 7.385375003834793, "learning_rate": 3.974272604254906e-06, "logits": -2.746447801589966, "logps": -198.5028839111328, "loss": 0.1637, "objective": 0.15741844475269318, "ranking_idealized": 0.9583333134651184, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.8833333253860474, "regularize": 0.15741844475269318, "step": 325 }, { "dpo_loss": 0.5491302609443665, "epoch": 1.8705715635333018, "grad_norm": 7.03671844997743, "learning_rate": 3.933941090877615e-06, "logits": -2.5696513652801514, "logps": -197.8240203857422, "loss": 0.1572, "objective": 0.15931017696857452, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.8374999761581421, "regularize": 0.15931017696857452, "step": 330 }, { "dpo_loss": 0.5589691400527954, "epoch": 1.89891355692017, "grad_norm": 6.477813384915639, "learning_rate": 3.893045541966975e-06, "logits": -2.762031316757202, "logps": -203.58236694335938, "loss": 0.1535, "objective": 0.15087805688381195, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.8708333373069763, "regularize": 0.15087805688381195, "step": 335 }, { "dpo_loss": 0.5663090944290161, "epoch": 1.9272555503070383, "grad_norm": 6.018506322545624, "learning_rate": 3.8516020436389945e-06, "logits": -2.7401764392852783, "logps": -201.431884765625, "loss": 0.1465, "objective": 0.14212678372859955, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.8458333611488342, "regularize": 0.14212678372859955, "step": 340 }, { "dpo_loss": 0.5585800409317017, "epoch": 1.9555975436939064, "grad_norm": 6.624036944677984, "learning_rate": 3.8096268975436045e-06, "logits": -2.8644747734069824, "logps": -205.10971069335938, "loss": 0.1496, "objective": 0.15010811388492584, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.8500000238418579, "regularize": 0.15010811388492584, "step": 345 }, { "dpo_loss": 0.5542294383049011, "epoch": 1.9839395370807746, "grad_norm": 6.797447561538416, "learning_rate": 3.767136614452458e-06, "logits": -2.858165740966797, "logps": -216.2846221923828, "loss": 0.1527, "objective": 0.1548275649547577, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.862500011920929, "regularize": 0.1548275649547577, "step": 350 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6759648323059082, "eval_logits": -3.1101362705230713, "eval_logps": -211.60682678222656, "eval_loss": 0.39596185088157654, "eval_objective": 0.39695027470588684, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5557851195335388, "eval_regularize": 0.39695027470588684, "eval_runtime": 259.1655, "eval_samples_per_second": 22.341, "eval_steps_per_second": 0.934, "step": 350 }, { "dpo_loss": 0.5424126386642456, "epoch": 2.012281530467643, "grad_norm": 5.8063883271711685, "learning_rate": 3.724147907764478e-06, "logits": -2.7706944942474365, "logps": -205.6593780517578, "loss": 0.1484, "objective": 0.1335248053073883, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.8958333134651184, "regularize": 0.1335248053073883, "step": 355 }, { "dpo_loss": 0.5424516797065735, "epoch": 2.040623523854511, "grad_norm": 6.095590594476145, "learning_rate": 3.6806776869317074e-06, "logits": -2.8919057846069336, "logps": -198.0377655029297, "loss": 0.1296, "objective": 0.13360460102558136, "ranking_idealized": 0.9750000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.8958333134651184, "regularize": 0.13360460102558136, "step": 360 }, { "dpo_loss": 0.5373047590255737, "epoch": 2.0689655172413794, "grad_norm": 6.231030884897393, "learning_rate": 3.6367430508080283e-06, "logits": -3.063735008239746, "logps": -207.88970947265625, "loss": 0.1326, "objective": 0.12027280777692795, "ranking_idealized": 0.9083333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.8458333611488342, "regularize": 0.12027280777692795, "step": 365 }, { "dpo_loss": 0.5498053431510925, "epoch": 2.0973075106282475, "grad_norm": 6.774825067357434, "learning_rate": 3.5923612809233987e-06, "logits": -3.0324106216430664, "logps": -194.41429138183594, "loss": 0.1275, "objective": 0.12781473994255066, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.8541666865348816, "regularize": 0.12781472504138947, "step": 370 }, { "dpo_loss": 0.5362412333488464, "epoch": 2.1256495040151155, "grad_norm": 5.992693343450592, "learning_rate": 3.547549834686222e-06, "logits": -3.0772500038146973, "logps": -205.6930694580078, "loss": 0.1251, "objective": 0.12155468761920929, "ranking_idealized": 0.9750000238418579, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.9125000238418579, "regularize": 0.12155468761920929, "step": 375 }, { "dpo_loss": 0.5477665066719055, "epoch": 2.153991497401984, "grad_norm": 5.66117672582953, "learning_rate": 3.5023263385165346e-06, "logits": -2.971487522125244, "logps": -195.91168212890625, "loss": 0.1269, "objective": 0.11776351928710938, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.8833333253860474, "regularize": 0.11776351928710938, "step": 380 }, { "dpo_loss": 0.5528424382209778, "epoch": 2.182333490788852, "grad_norm": 5.673453112921881, "learning_rate": 3.4567085809127247e-06, "logits": -3.0804078578948975, "logps": -187.17169189453125, "loss": 0.1248, "objective": 0.1074480265378952, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.9166666865348816, "regularize": 0.1074480265378952, "step": 385 }, { "dpo_loss": 0.5393837094306946, "epoch": 2.21067548417572, "grad_norm": 6.024104465892304, "learning_rate": 3.410714505454486e-06, "logits": -2.998112916946411, "logps": -206.6367645263672, "loss": 0.1224, "objective": 0.11136513203382492, "ranking_idealized": 0.9083333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.8666666746139526, "regularize": 0.11136512458324432, "step": 390 }, { "dpo_loss": 0.5289559364318848, "epoch": 2.2390174775625886, "grad_norm": 6.100671720050322, "learning_rate": 3.364362203744777e-06, "logits": -3.014930009841919, "logps": -199.62350463867188, "loss": 0.1358, "objective": 0.1332855224609375, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.8999999761581421, "regularize": 0.1332855224609375, "step": 395 }, { "dpo_loss": 0.5282385945320129, "epoch": 2.2673594709494567, "grad_norm": 5.884120825175806, "learning_rate": 3.3176699082935546e-06, "logits": -3.104701042175293, "logps": -204.1650390625, "loss": 0.1267, "objective": 0.13824278116226196, "ranking_idealized": 0.9666666388511658, "ranking_idealized_expo": 0.6041666865348816, "ranking_simple": 0.9083333611488342, "regularize": 0.13824278116226196, "step": 400 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6775676608085632, "eval_logits": -3.2515387535095215, "eval_logps": -201.03680419921875, "eval_loss": 0.39813509583473206, "eval_objective": 0.39980101585388184, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5619834661483765, "eval_regularize": 0.39980101585388184, "eval_runtime": 259.6475, "eval_samples_per_second": 22.299, "eval_steps_per_second": 0.932, "step": 400 }, { "dpo_loss": 0.5318711400032043, "epoch": 2.295701464336325, "grad_norm": 5.891428026423688, "learning_rate": 3.2706559853460818e-06, "logits": -3.1382436752319336, "logps": -204.19851684570312, "loss": 0.1245, "objective": 0.12317010760307312, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.8999999761581421, "regularize": 0.12317009270191193, "step": 405 }, { "dpo_loss": 0.5330458879470825, "epoch": 2.324043457723193, "grad_norm": 5.883839309354464, "learning_rate": 3.2233389276586325e-06, "logits": -2.8399434089660645, "logps": -203.78355407714844, "loss": 0.1172, "objective": 0.11361113935709, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.862500011920929, "regularize": 0.1136111319065094, "step": 410 }, { "dpo_loss": 0.5357276797294617, "epoch": 2.3523854511100613, "grad_norm": 5.777709064523667, "learning_rate": 3.1757373472244324e-06, "logits": -2.7951467037200928, "logps": -203.05201721191406, "loss": 0.1182, "objective": 0.12953059375286102, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.9041666388511658, "regularize": 0.12953059375286102, "step": 415 }, { "dpo_loss": 0.5517702698707581, "epoch": 2.3807274444969297, "grad_norm": 5.823648414077854, "learning_rate": 3.127869967952698e-06, "logits": -2.653197765350342, "logps": -199.99070739746094, "loss": 0.124, "objective": 0.12403346598148346, "ranking_idealized": 0.9041666388511658, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.8333333134651184, "regularize": 0.12403346598148346, "step": 420 }, { "dpo_loss": 0.5411447286605835, "epoch": 2.409069437883798, "grad_norm": 5.406034966497648, "learning_rate": 3.0797556183036582e-06, "logits": -2.7264721393585205, "logps": -199.3270721435547, "loss": 0.1206, "objective": 0.11311660706996918, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.887499988079071, "regularize": 0.11311660706996918, "step": 425 }, { "dpo_loss": 0.5396248698234558, "epoch": 2.4374114312706663, "grad_norm": 5.630938578254106, "learning_rate": 3.0314132238824416e-06, "logits": -2.8288919925689697, "logps": -200.36387634277344, "loss": 0.1145, "objective": 0.10814479738473892, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.8916666507720947, "regularize": 0.10814479738473892, "step": 430 }, { "dpo_loss": 0.537260890007019, "epoch": 2.4657534246575343, "grad_norm": 5.382852925810034, "learning_rate": 2.9828617999947647e-06, "logits": -2.9378559589385986, "logps": -207.9824676513672, "loss": 0.1177, "objective": 0.1222720518708229, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.8833333253860474, "regularize": 0.1222720518708229, "step": 435 }, { "dpo_loss": 0.5335346460342407, "epoch": 2.4940954180444024, "grad_norm": 5.4087072313434, "learning_rate": 2.9341204441673267e-06, "logits": -2.8663976192474365, "logps": -201.47125244140625, "loss": 0.1171, "objective": 0.11767010390758514, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.887499988079071, "regularize": 0.11767010390758514, "step": 440 }, { "dpo_loss": 0.5356226563453674, "epoch": 2.5224374114312704, "grad_norm": 5.709598259442252, "learning_rate": 2.8852083286358647e-06, "logits": -2.838826894760132, "logps": -196.2780303955078, "loss": 0.1126, "objective": 0.10499007254838943, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.9041666388511658, "regularize": 0.10499005764722824, "step": 445 }, { "dpo_loss": 0.5633688569068909, "epoch": 2.550779404818139, "grad_norm": 5.3793816763659255, "learning_rate": 2.8361446928038298e-06, "logits": -2.838387966156006, "logps": -201.06651306152344, "loss": 0.1121, "objective": 0.11985477060079575, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8458333611488342, "regularize": 0.11985477060079575, "step": 450 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6782127022743225, "eval_logits": -2.9522619247436523, "eval_logps": -192.7809295654297, "eval_loss": 0.39569520950317383, "eval_objective": 0.3975852131843567, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5619834661483765, "eval_regularize": 0.3975852131843567, "eval_runtime": 258.9142, "eval_samples_per_second": 22.363, "eval_steps_per_second": 0.935, "step": 450 }, { "dpo_loss": 0.5279621481895447, "epoch": 2.579121398205007, "grad_norm": 5.392681143501708, "learning_rate": 2.7869488356746344e-06, "logits": -2.902580499649048, "logps": -197.00804138183594, "loss": 0.1147, "objective": 0.11110316216945648, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.9041666388511658, "regularize": 0.11110316216945648, "step": 455 }, { "dpo_loss": 0.5277626514434814, "epoch": 2.6074633915918755, "grad_norm": 5.315354427263376, "learning_rate": 2.7376401082604563e-06, "logits": -3.0961711406707764, "logps": -201.3343048095703, "loss": 0.1143, "objective": 0.11841437220573425, "ranking_idealized": 0.9624999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.9291666746139526, "regularize": 0.11841436475515366, "step": 460 }, { "dpo_loss": 0.5351486802101135, "epoch": 2.6358053849787435, "grad_norm": 5.411425844401666, "learning_rate": 2.6882379059705953e-06, "logits": -3.0071170330047607, "logps": -197.46665954589844, "loss": 0.1123, "objective": 0.11872568726539612, "ranking_idealized": 0.9166666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8833333253860474, "regularize": 0.11872567981481552, "step": 465 }, { "dpo_loss": 0.5352925658226013, "epoch": 2.6641473783656116, "grad_norm": 5.65268285367403, "learning_rate": 2.6387616609823506e-06, "logits": -3.0599467754364014, "logps": -206.38381958007812, "loss": 0.1029, "objective": 0.103180892765522, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.9166666865348816, "regularize": 0.103180892765522, "step": 470 }, { "dpo_loss": 0.5457909107208252, "epoch": 2.69248937175248, "grad_norm": 5.4108717659373395, "learning_rate": 2.5892308345974517e-06, "logits": -2.9345638751983643, "logps": -191.0044403076172, "loss": 0.1057, "objective": 0.1046978086233139, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.887499988079071, "regularize": 0.1046978086233139, "step": 475 }, { "dpo_loss": 0.5475970506668091, "epoch": 2.720831365139348, "grad_norm": 5.319894863881298, "learning_rate": 2.53966490958702e-06, "logits": -3.068021059036255, "logps": -189.0258026123047, "loss": 0.1101, "objective": 0.10782204568386078, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.8916666507720947, "regularize": 0.10782204568386078, "step": 480 }, { "dpo_loss": 0.537803590297699, "epoch": 2.7491733585262166, "grad_norm": 5.436329599180351, "learning_rate": 2.490083382528097e-06, "logits": -3.1013996601104736, "logps": -200.41253662109375, "loss": 0.1114, "objective": 0.11053992807865143, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.887499988079071, "regularize": 0.11053992807865143, "step": 485 }, { "dpo_loss": 0.5218387842178345, "epoch": 2.7775153519130846, "grad_norm": 5.475491663367257, "learning_rate": 2.440505756134732e-06, "logits": -3.1719369888305664, "logps": -201.2511444091797, "loss": 0.1091, "objective": 0.1095014289021492, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.9041666388511658, "regularize": 0.1095014289021492, "step": 490 }, { "dpo_loss": 0.5326829552650452, "epoch": 2.8058573452999527, "grad_norm": 5.099192097500262, "learning_rate": 2.3909515315866606e-06, "logits": -3.1574199199676514, "logps": -198.6073760986328, "loss": 0.1017, "objective": 0.09904598444700241, "ranking_idealized": 0.9166666865348816, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.887499988079071, "regularize": 0.09904597699642181, "step": 495 }, { "dpo_loss": 0.5235782861709595, "epoch": 2.8341993386868207, "grad_norm": 5.2339920064616186, "learning_rate": 2.341440200858589e-06, "logits": -3.177107810974121, "logps": -198.19998168945312, "loss": 0.1063, "objective": 0.10962475836277008, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.9125000238418579, "regularize": 0.10962474346160889, "step": 500 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6759718656539917, "eval_logits": -3.2835006713867188, "eval_logps": -195.79200744628906, "eval_loss": 0.39410659670829773, "eval_objective": 0.3949359357357025, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.567148745059967, "eval_regularize": 0.3949359357357025, "eval_runtime": 259.0636, "eval_samples_per_second": 22.35, "eval_steps_per_second": 0.934, "step": 500 }, { "dpo_loss": 0.5304385423660278, "epoch": 2.862541332073689, "grad_norm": 5.373601466021835, "learning_rate": 2.2919912390530945e-06, "logits": -3.0923917293548584, "logps": -199.0631866455078, "loss": 0.102, "objective": 0.10511735081672668, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.9083333611488342, "regularize": 0.10511735081672668, "step": 505 }, { "dpo_loss": 0.5375287532806396, "epoch": 2.8908833254605573, "grad_norm": 6.218818504345056, "learning_rate": 2.242624096740164e-06, "logits": -3.0648419857025146, "logps": -198.03871154785156, "loss": 0.1009, "objective": 0.09942923486232758, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.8916666507720947, "regularize": 0.09942923486232758, "step": 510 }, { "dpo_loss": 0.5454570651054382, "epoch": 2.9192253188474258, "grad_norm": 5.360667892296426, "learning_rate": 2.193358192306384e-06, "logits": -3.177243709564209, "logps": -192.1931610107422, "loss": 0.1015, "objective": 0.1067223846912384, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.887499988079071, "regularize": 0.1067223846912384, "step": 515 }, { "dpo_loss": 0.5447422862052917, "epoch": 2.947567312234294, "grad_norm": 5.250252460609212, "learning_rate": 2.1442129043167877e-06, "logits": -2.988645076751709, "logps": -196.80099487304688, "loss": 0.0957, "objective": 0.09414150565862656, "ranking_idealized": 0.9083333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.8833333253860474, "regularize": 0.09414150565862656, "step": 520 }, { "dpo_loss": 0.5430293679237366, "epoch": 2.975909305621162, "grad_norm": 5.566335571750069, "learning_rate": 2.0952075638923656e-06, "logits": -2.975144863128662, "logps": -196.40476989746094, "loss": 0.0946, "objective": 0.10850825160741806, "ranking_idealized": 0.8999999761581421, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.8666666746139526, "regularize": 0.10850825160741806, "step": 525 }, { "dpo_loss": 0.5192977786064148, "epoch": 3.0042512990080303, "grad_norm": 5.207616334659725, "learning_rate": 2.046361447106244e-06, "logits": -2.9917781352996826, "logps": -199.7664031982422, "loss": 0.099, "objective": 0.09743621945381165, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.925000011920929, "regularize": 0.09743621200323105, "step": 530 }, { "dpo_loss": 0.5329768061637878, "epoch": 3.0325932923948984, "grad_norm": 5.005538343407122, "learning_rate": 1.997693767401503e-06, "logits": -3.075000286102295, "logps": -204.2377166748047, "loss": 0.0882, "objective": 0.08230598270893097, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.9083333611488342, "regularize": 0.08230597525835037, "step": 535 }, { "dpo_loss": 0.538392961025238, "epoch": 3.0609352857817664, "grad_norm": 4.866997353798794, "learning_rate": 1.9492236680336486e-06, "logits": -3.1421122550964355, "logps": -189.7461700439453, "loss": 0.0842, "objective": 0.07560276240110397, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.9083333611488342, "regularize": 0.07560275495052338, "step": 540 }, { "dpo_loss": 0.5382309556007385, "epoch": 3.089277279168635, "grad_norm": 5.075388727380349, "learning_rate": 1.9009702145406728e-06, "logits": -3.102593421936035, "logps": -204.09027099609375, "loss": 0.0817, "objective": 0.08249451220035553, "ranking_idealized": 0.9041666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8833333253860474, "regularize": 0.08249450474977493, "step": 545 }, { "dpo_loss": 0.5309434533119202, "epoch": 3.117619272555503, "grad_norm": 4.919742025901051, "learning_rate": 1.852952387243698e-06, "logits": -2.887840747833252, "logps": -203.031982421875, "loss": 0.0891, "objective": 0.0902470126748085, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.9375, "regularize": 0.0902470126748085, "step": 550 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 0.6777102947235107, "eval_logits": -3.195338249206543, "eval_logps": -196.1659393310547, "eval_loss": 0.39561545848846436, "eval_objective": 0.39604148268699646, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5609503984451294, "eval_regularize": 0.39604148268699646, "eval_runtime": 259.2511, "eval_samples_per_second": 22.334, "eval_steps_per_second": 0.933, "step": 550 }, { "dpo_loss": 0.5161585211753845, "epoch": 3.1459612659423715, "grad_norm": 5.02329657218416, "learning_rate": 1.8051890737811395e-06, "logits": -3.032655954360962, "logps": -205.76190185546875, "loss": 0.0842, "objective": 0.08457961678504944, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.949999988079071, "regularize": 0.08457960933446884, "step": 555 }, { "dpo_loss": 0.5269332528114319, "epoch": 3.1743032593292395, "grad_norm": 5.022767770754425, "learning_rate": 1.7576990616793139e-06, "logits": -3.001573324203491, "logps": -205.6256561279297, "loss": 0.0777, "objective": 0.07936005294322968, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.9041666388511658, "regularize": 0.07936005294322968, "step": 560 }, { "dpo_loss": 0.5309363603591919, "epoch": 3.2026452527161076, "grad_norm": 5.117713576028772, "learning_rate": 1.7105010309624381e-06, "logits": -3.0092759132385254, "logps": -198.69540405273438, "loss": 0.0791, "objective": 0.07759826630353928, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.8999999761581421, "regularize": 0.07759825885295868, "step": 565 }, { "dpo_loss": 0.5266720652580261, "epoch": 3.230987246102976, "grad_norm": 4.836758469100523, "learning_rate": 1.6636135468049122e-06, "logits": -2.9470977783203125, "logps": -202.33779907226562, "loss": 0.0828, "objective": 0.08297502994537354, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.9041666388511658, "regularize": 0.08297502249479294, "step": 570 }, { "dpo_loss": 0.5284319519996643, "epoch": 3.259329239489844, "grad_norm": 5.27684795086492, "learning_rate": 1.617055052228768e-06, "logits": -3.067121744155884, "logps": -201.16802978515625, "loss": 0.0794, "objective": 0.08327650278806686, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.8999999761581421, "regularize": 0.08327650278806686, "step": 575 }, { "dpo_loss": 0.5467706918716431, "epoch": 3.287671232876712, "grad_norm": 4.999181466831561, "learning_rate": 1.5708438608491816e-06, "logits": -3.0922207832336426, "logps": -200.73341369628906, "loss": 0.0809, "objective": 0.08547008782625198, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.887499988079071, "regularize": 0.08547007292509079, "step": 580 }, { "dpo_loss": 0.5353319644927979, "epoch": 3.3160132262635806, "grad_norm": 4.7454466484307485, "learning_rate": 1.524998149670871e-06, "logits": -3.148766040802002, "logps": -199.28677368164062, "loss": 0.0767, "objective": 0.06919746100902557, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.887499988079071, "regularize": 0.06919745355844498, "step": 585 }, { "dpo_loss": 0.5416805148124695, "epoch": 3.3443552196504487, "grad_norm": 4.994244494597657, "learning_rate": 1.479535951938243e-06, "logits": -3.191918134689331, "logps": -201.75802612304688, "loss": 0.0757, "objective": 0.06989765167236328, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.8958333134651184, "regularize": 0.06989765167236328, "step": 590 }, { "dpo_loss": 0.5297635197639465, "epoch": 3.372697213037317, "grad_norm": 5.147488031975634, "learning_rate": 1.43447515004208e-06, "logits": -3.0706212520599365, "logps": -200.92311096191406, "loss": 0.0775, "objective": 0.0749397724866867, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.9166666865348816, "regularize": 0.0749397724866867, "step": 595 }, { "dpo_loss": 0.511430561542511, "epoch": 3.4010392064241852, "grad_norm": 4.864631411349059, "learning_rate": 1.3898334684855647e-06, "logits": -3.051577091217041, "logps": -202.49258422851562, "loss": 0.0749, "objective": 0.07237013429403305, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.9375, "regularize": 0.07237013429403305, "step": 600 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 0.6780565977096558, "eval_logits": -3.1966371536254883, "eval_logps": -194.1237335205078, "eval_loss": 0.3962407112121582, "eval_objective": 0.39730900526046753, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5743801593780518, "eval_regularize": 0.39730900526046753, "eval_runtime": 258.6121, "eval_samples_per_second": 22.389, "eval_steps_per_second": 0.936, "step": 600 }, { "dpo_loss": 0.5354328155517578, "epoch": 3.4293811998110533, "grad_norm": 5.220508290696569, "learning_rate": 1.3456284669124159e-06, "logits": -3.0896830558776855, "logps": -204.61468505859375, "loss": 0.0733, "objective": 0.06684383749961853, "ranking_idealized": 0.9166666865348816, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.9083333611488342, "regularize": 0.06684383004903793, "step": 605 }, { "dpo_loss": 0.5366904139518738, "epoch": 3.4577231931979218, "grad_norm": 4.948012773738948, "learning_rate": 1.301877533199859e-06, "logits": -3.0734212398529053, "logps": -203.69866943359375, "loss": 0.0729, "objective": 0.06990881264209747, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.8958333134651184, "regularize": 0.06990881264209747, "step": 610 }, { "dpo_loss": 0.5279187560081482, "epoch": 3.48606518658479, "grad_norm": 5.16746605562179, "learning_rate": 1.2585978766191726e-06, "logits": -3.0537939071655273, "logps": -202.3527374267578, "loss": 0.0737, "objective": 0.07423458993434906, "ranking_idealized": 0.9083333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.875, "regularize": 0.07423458248376846, "step": 615 }, { "dpo_loss": 0.5187819004058838, "epoch": 3.514407179971658, "grad_norm": 4.801574670976317, "learning_rate": 1.2158065210664848e-06, "logits": -2.913203001022339, "logps": -204.0981903076172, "loss": 0.0707, "objective": 0.06695393472909927, "ranking_idealized": 0.8999999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8999999761581421, "regularize": 0.06695392727851868, "step": 620 }, { "dpo_loss": 0.5320748686790466, "epoch": 3.5427491733585263, "grad_norm": 4.901983573451279, "learning_rate": 1.1735202983664803e-06, "logits": -3.018667459487915, "logps": -197.7013397216797, "loss": 0.0682, "objective": 0.06922433525323868, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.9333333373069763, "regularize": 0.06922433525323868, "step": 625 }, { "dpo_loss": 0.5438559651374817, "epoch": 3.5710911667453944, "grad_norm": 4.916430683667445, "learning_rate": 1.1317558416516696e-06, "logits": -3.063880681991577, "logps": -198.75747680664062, "loss": 0.0704, "objective": 0.07144972681999207, "ranking_idealized": 0.9125000238418579, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.9125000238418579, "regularize": 0.07144972681999207, "step": 630 }, { "dpo_loss": 0.5314496159553528, "epoch": 3.5994331601322624, "grad_norm": 4.767769596097874, "learning_rate": 1.0905295788197993e-06, "logits": -3.036067247390747, "logps": -196.23695373535156, "loss": 0.0647, "objective": 0.05931680276989937, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.9333333373069763, "regularize": 0.05931679159402847, "step": 635 }, { "dpo_loss": 0.5109093189239502, "epoch": 3.627775153519131, "grad_norm": 4.841523497854697, "learning_rate": 1.049857726072005e-06, "logits": -3.0281994342803955, "logps": -211.7641143798828, "loss": 0.0648, "objective": 0.06795307993888855, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.9125000238418579, "regularize": 0.06795307248830795, "step": 640 }, { "dpo_loss": 0.5220555067062378, "epoch": 3.656117146905999, "grad_norm": 4.756494572923249, "learning_rate": 1.0097562815342215e-06, "logits": -3.0403778553009033, "logps": -197.11727905273438, "loss": 0.0696, "objective": 0.07045839726924896, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.8999999761581421, "regularize": 0.07045838981866837, "step": 645 }, { "dpo_loss": 0.5448586344718933, "epoch": 3.6844591402928675, "grad_norm": 4.78589420445802, "learning_rate": 9.702410189643838e-07, "logits": -3.0378682613372803, "logps": -199.30670166015625, "loss": 0.062, "objective": 0.06362789124250412, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.9208333492279053, "regularize": 0.06362788379192352, "step": 650 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 0.6778165698051453, "eval_logits": -3.241185188293457, "eval_logps": -195.3243865966797, "eval_loss": 0.39558976888656616, "eval_objective": 0.3966863453388214, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5702479481697083, "eval_regularize": 0.3966863453388214, "eval_runtime": 259.3281, "eval_samples_per_second": 22.327, "eval_steps_per_second": 0.933, "step": 650 }, { "dpo_loss": 0.5378040671348572, "epoch": 3.7128011336797355, "grad_norm": 4.991430717748734, "learning_rate": 9.313274815478698e-07, "logits": -3.06374454498291, "logps": -206.94361877441406, "loss": 0.0675, "objective": 0.07482859492301941, "ranking_idealized": 0.9166666865348816, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.8833333253860474, "regularize": 0.07482858747243881, "step": 655 }, { "dpo_loss": 0.533437192440033, "epoch": 3.7411431270666036, "grad_norm": 4.914085991583788, "learning_rate": 8.930309757836517e-07, "logits": -3.1829257011413574, "logps": -198.15655517578125, "loss": 0.0645, "objective": 0.0638025775551796, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.9458333253860474, "regularize": 0.0638025775551796, "step": 660 }, { "dpo_loss": 0.5339053869247437, "epoch": 3.769485120453472, "grad_norm": 4.883348560718917, "learning_rate": 8.553665654635343e-07, "logits": -3.0380542278289795, "logps": -192.95997619628906, "loss": 0.0651, "objective": 0.06292819231748581, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.9125000238418579, "regularize": 0.06292817741632462, "step": 665 }, { "dpo_loss": 0.5281752943992615, "epoch": 3.79782711384034, "grad_norm": 5.904298344985475, "learning_rate": 8.183490657468687e-07, "logits": -3.175488233566284, "logps": -201.79714965820312, "loss": 0.0635, "objective": 0.06745120882987976, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.612500011920929, "ranking_simple": 0.9125000238418579, "regularize": 0.06745120882987976, "step": 670 }, { "dpo_loss": 0.535234808921814, "epoch": 3.826169107227208, "grad_norm": 4.6497906673921685, "learning_rate": 7.819930373330669e-07, "logits": -3.079956531524658, "logps": -195.46868896484375, "loss": 0.0606, "objective": 0.05796652287244797, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.9041666388511658, "regularize": 0.05796651914715767, "step": 675 }, { "dpo_loss": 0.5492002964019775, "epoch": 3.8545111006140766, "grad_norm": 5.184144239252589, "learning_rate": 7.463127807341966e-07, "logits": -3.021759033203125, "logps": -195.5998992919922, "loss": 0.0607, "objective": 0.0610785037279129, "ranking_idealized": 0.9083333611488342, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.862500011920929, "regularize": 0.06107849255204201, "step": 680 }, { "dpo_loss": 0.5241864323616028, "epoch": 3.8828530940009447, "grad_norm": 5.005865678419639, "learning_rate": 7.113223306499336e-07, "logits": -3.1358683109283447, "logps": -201.37371826171875, "loss": 0.0629, "objective": 0.06049242988228798, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.9125000238418579, "regularize": 0.06049241125583649, "step": 685 }, { "dpo_loss": 0.5277370810508728, "epoch": 3.9111950873878127, "grad_norm": 4.853091570155952, "learning_rate": 6.770354504470575e-07, "logits": -3.0913193225860596, "logps": -194.60806274414062, "loss": 0.0576, "objective": 0.05857709422707558, "ranking_idealized": 0.9541666507720947, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.9333333373069763, "regularize": 0.05857709422707558, "step": 690 }, { "dpo_loss": 0.5350156426429749, "epoch": 3.9395370807746812, "grad_norm": 4.643145060858906, "learning_rate": 6.434656267456843e-07, "logits": -3.007568836212158, "logps": -196.4861297607422, "loss": 0.062, "objective": 0.06293628364801407, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.9166666865348816, "regularize": 0.06293627619743347, "step": 695 }, { "dpo_loss": 0.537192165851593, "epoch": 3.9678790741615493, "grad_norm": 4.752148657093376, "learning_rate": 6.106260641143547e-07, "logits": -3.088932991027832, "logps": -200.46910095214844, "loss": 0.0583, "objective": 0.06041649729013443, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.9375, "regularize": 0.06041649356484413, "step": 700 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 0.677208423614502, "eval_logits": -3.2432026863098145, "eval_logps": -196.44686889648438, "eval_loss": 0.3955562114715576, "eval_objective": 0.3960891366004944, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.5640496015548706, "eval_regularize": 0.3960891366004944, "eval_runtime": 258.7136, "eval_samples_per_second": 22.38, "eval_steps_per_second": 0.935, "step": 700 }, { "dpo_loss": 0.5246204733848572, "epoch": 3.9962210675484178, "grad_norm": 4.771032845540052, "learning_rate": 5.785296798760601e-07, "logits": -3.013643980026245, "logps": -202.21218872070312, "loss": 0.0575, "objective": 0.04958561435341835, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.9208333492279053, "regularize": 0.049585599452257156, "step": 705 }, { "dpo_loss": 0.5198561549186707, "epoch": 4.024563060935286, "grad_norm": 4.697507600765225, "learning_rate": 5.471890990272666e-07, "logits": -3.1067426204681396, "logps": -207.82223510742188, "loss": 0.0497, "objective": 0.054764509201049805, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.9166666865348816, "regularize": 0.05476450175046921, "step": 710 }, { "dpo_loss": 0.5302870869636536, "epoch": 4.052905054322154, "grad_norm": 4.631394448060559, "learning_rate": 5.166166492719124e-07, "logits": -3.0587379932403564, "logps": -204.6709442138672, "loss": 0.0462, "objective": 0.044012073427438736, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.9125000238418579, "regularize": 0.04401206597685814, "step": 715 }, { "dpo_loss": 0.5232208371162415, "epoch": 4.081247047709022, "grad_norm": 4.929724097820593, "learning_rate": 4.868243561723535e-07, "logits": -2.9354002475738525, "logps": -205.88121032714844, "loss": 0.0468, "objective": 0.053409043699502945, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.925000011920929, "regularize": 0.05340903252363205, "step": 720 }, { "dpo_loss": 0.5423314571380615, "epoch": 4.109589041095891, "grad_norm": 4.739841142104876, "learning_rate": 4.57823938419153e-07, "logits": -3.0645394325256348, "logps": -200.61724853515625, "loss": 0.0479, "objective": 0.04531220719218254, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.9208333492279053, "regularize": 0.045312199741601944, "step": 725 }, { "dpo_loss": 0.5259865522384644, "epoch": 4.137931034482759, "grad_norm": 4.6973617362423665, "learning_rate": 4.2962680322157335e-07, "logits": -3.1625542640686035, "logps": -201.07965087890625, "loss": 0.0486, "objective": 0.049515120685100555, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.887499988079071, "regularize": 0.04951511323451996, "step": 730 }, { "dpo_loss": 0.5347721576690674, "epoch": 4.166273027869627, "grad_norm": 4.718934829997983, "learning_rate": 4.0224404182059443e-07, "logits": -3.0613696575164795, "logps": -204.30772399902344, "loss": 0.0439, "objective": 0.04426734894514084, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.949999988079071, "regularize": 0.04426734521985054, "step": 735 }, { "dpo_loss": 0.5107501149177551, "epoch": 4.194615021256495, "grad_norm": 4.7379442831945635, "learning_rate": 3.756864251262143e-07, "logits": -3.04003643989563, "logps": -202.8253631591797, "loss": 0.0459, "objective": 0.04314772039651871, "ranking_idealized": 0.9583333134651184, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.949999988079071, "regularize": 0.043147701770067215, "step": 740 }, { "dpo_loss": 0.5249863266944885, "epoch": 4.222957014643363, "grad_norm": 4.71988199553104, "learning_rate": 3.499643994807486e-07, "logits": -3.1296160221099854, "logps": -198.77182006835938, "loss": 0.046, "objective": 0.045759402215480804, "ranking_idealized": 0.9166666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8958333134651184, "regularize": 0.04575938731431961, "step": 745 }, { "dpo_loss": 0.522843599319458, "epoch": 4.251299008030231, "grad_norm": 4.672227955095767, "learning_rate": 3.250880825498026e-07, "logits": -3.2786660194396973, "logps": -199.6768035888672, "loss": 0.0451, "objective": 0.049515463411808014, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.9166666865348816, "regularize": 0.04951544478535652, "step": 750 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 0.6771067380905151, "eval_logits": -3.266589403152466, "eval_logps": -195.43978881835938, "eval_loss": 0.39519038796424866, "eval_objective": 0.3954727351665497, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.567148745059967, "eval_regularize": 0.3954727351665497, "eval_runtime": 258.4936, "eval_samples_per_second": 22.399, "eval_steps_per_second": 0.936, "step": 750 }, { "dpo_loss": 0.528011679649353, "epoch": 4.2796410014171, "grad_norm": 5.004941031227222, "learning_rate": 3.0106725934252095e-07, "logits": -3.2007675170898438, "logps": -196.980224609375, "loss": 0.0469, "objective": 0.052510153502225876, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.9166666865348816, "regularize": 0.05251014232635498, "step": 755 }, { "dpo_loss": 0.5268819332122803, "epoch": 4.307982994803968, "grad_norm": 4.8743952598559135, "learning_rate": 2.779113783626916e-07, "logits": -3.166001796722412, "logps": -206.85211181640625, "loss": 0.0436, "objective": 0.04319094866514206, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.9291666746139526, "regularize": 0.04319094493985176, "step": 760 }, { "dpo_loss": 0.5119529962539673, "epoch": 4.336324988190836, "grad_norm": 4.7936404352453845, "learning_rate": 2.5562954789221164e-07, "logits": -3.224353790283203, "logps": -204.93324279785156, "loss": 0.0447, "objective": 0.04502396285533905, "ranking_idealized": 0.9624999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.9583333134651184, "regularize": 0.04502394422888756, "step": 765 }, { "dpo_loss": 0.5143262147903442, "epoch": 4.364666981577704, "grad_norm": 4.845346036030975, "learning_rate": 2.3423053240837518e-07, "logits": -3.086646318435669, "logps": -200.40354919433594, "loss": 0.0447, "objective": 0.04372342303395271, "ranking_idealized": 0.9666666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.9708333611488342, "regularize": 0.043723396956920624, "step": 770 }, { "dpo_loss": 0.5219811797142029, "epoch": 4.393008974964572, "grad_norm": 4.882318635829277, "learning_rate": 2.137227491364016e-07, "logits": -3.1227707862854004, "logps": -202.45298767089844, "loss": 0.0431, "objective": 0.042134013026952744, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.9125000238418579, "regularize": 0.04213400185108185, "step": 775 }, { "dpo_loss": 0.5324522852897644, "epoch": 4.42135096835144, "grad_norm": 5.178173799388018, "learning_rate": 1.941142647385469e-07, "logits": -3.1812171936035156, "logps": -196.50355529785156, "loss": 0.0404, "objective": 0.039291638880968094, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.925000011920929, "regularize": 0.0392916202545166, "step": 780 }, { "dpo_loss": 0.543372392654419, "epoch": 4.449692961738309, "grad_norm": 4.563712486054605, "learning_rate": 1.7541279214111277e-07, "logits": -3.204663038253784, "logps": -198.68594360351562, "loss": 0.0495, "objective": 0.05586666613817215, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.9291666746139526, "regularize": 0.05586665868759155, "step": 785 }, { "dpo_loss": 0.5179670453071594, "epoch": 4.478034955125177, "grad_norm": 4.962116136464137, "learning_rate": 1.5762568750059604e-07, "logits": -3.1283469200134277, "logps": -203.68194580078125, "loss": 0.0426, "objective": 0.04570373520255089, "ranking_idealized": 0.949999988079071, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.9541666507720947, "regularize": 0.04570373147726059, "step": 790 }, { "dpo_loss": 0.525623083114624, "epoch": 4.506376948512045, "grad_norm": 4.808999753603184, "learning_rate": 1.4075994731016895e-07, "logits": -3.018510580062866, "logps": -205.88327026367188, "loss": 0.0435, "objective": 0.036049842834472656, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.925000011920929, "regularize": 0.03604983165860176, "step": 795 }, { "dpo_loss": 0.5233331918716431, "epoch": 4.534718941898913, "grad_norm": 4.734460423236949, "learning_rate": 1.2482220564763669e-07, "logits": -3.0628395080566406, "logps": -201.49278259277344, "loss": 0.0438, "objective": 0.04488484933972359, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.925000011920929, "regularize": 0.04488483443856239, "step": 800 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 0.6771342158317566, "eval_logits": -3.2692906856536865, "eval_logps": -195.2318878173828, "eval_loss": 0.39515408873558044, "eval_objective": 0.39557480812072754, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.56611567735672, "eval_regularize": 0.39557480812072754, "eval_runtime": 258.9479, "eval_samples_per_second": 22.36, "eval_steps_per_second": 0.935, "step": 800 }, { "dpo_loss": 0.5237378478050232, "epoch": 4.563060935285781, "grad_norm": 4.695279873765779, "learning_rate": 1.0981873156594381e-07, "logits": -3.0811893939971924, "logps": -198.17877197265625, "loss": 0.0405, "objective": 0.04346688091754913, "ranking_idealized": 0.8999999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.8958333134651184, "regularize": 0.043466873466968536, "step": 805 }, { "dpo_loss": 0.5305168032646179, "epoch": 4.59140292867265, "grad_norm": 4.641921422533774, "learning_rate": 9.575542662726756e-08, "logits": -3.176128387451172, "logps": -197.74447631835938, "loss": 0.0422, "objective": 0.036040760576725006, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.9166666865348816, "regularize": 0.03604074567556381, "step": 810 }, { "dpo_loss": 0.5344857573509216, "epoch": 4.619744922059518, "grad_norm": 4.571244888107197, "learning_rate": 8.26378225816582e-08, "logits": -3.02875018119812, "logps": -193.68545532226562, "loss": 0.0411, "objective": 0.03461510315537453, "ranking_idealized": 0.9416666626930237, "ranking_idealized_expo": 0.6333333253860474, "ranking_simple": 0.925000011920929, "regularize": 0.03461508825421333, "step": 815 }, { "dpo_loss": 0.5378891825675964, "epoch": 4.648086915446386, "grad_norm": 4.696564500526644, "learning_rate": 7.047107919114588e-08, "logits": -3.148911952972412, "logps": -204.34703063964844, "loss": 0.0412, "objective": 0.040915556252002716, "ranking_idealized": 0.9333333373069763, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.925000011920929, "regularize": 0.04091554507613182, "step": 820 }, { "dpo_loss": 0.5378555059432983, "epoch": 4.6764289088332545, "grad_norm": 4.6443623208845795, "learning_rate": 5.92599822001666e-08, "logits": -3.0313339233398438, "logps": -200.1685028076172, "loss": 0.0388, "objective": 0.034796856343746185, "ranking_idealized": 0.9125000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8999999761581421, "regularize": 0.03479684516787529, "step": 825 }, { "dpo_loss": 0.5235874056816101, "epoch": 4.7047709022201225, "grad_norm": 4.667685116333195, "learning_rate": 4.9008941453107527e-08, "logits": -3.2229866981506348, "logps": -199.17506408691406, "loss": 0.043, "objective": 0.04358634725213051, "ranking_idealized": 0.925000011920929, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.9166666865348816, "regularize": 0.043586332350969315, "step": 830 }, { "dpo_loss": 0.5147577524185181, "epoch": 4.733112895606991, "grad_norm": 4.858854755852941, "learning_rate": 3.972198915970976e-08, "logits": -3.1338717937469482, "logps": -205.56285095214844, "loss": 0.0391, "objective": 0.04283083602786064, "ranking_idealized": 0.9291666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.8999999761581421, "regularize": 0.04283082112669945, "step": 835 }, { "dpo_loss": 0.5037484765052795, "epoch": 4.7614548889938595, "grad_norm": 4.877052385921156, "learning_rate": 3.1402778309014284e-08, "logits": -3.141592502593994, "logps": -206.25045776367188, "loss": 0.0442, "objective": 0.04478234797716141, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.9375, "regularize": 0.04478234425187111, "step": 840 }, { "dpo_loss": 0.5356060266494751, "epoch": 4.7897968823807275, "grad_norm": 4.719985877621544, "learning_rate": 2.4054581232470785e-08, "logits": -3.155550241470337, "logps": -196.71856689453125, "loss": 0.0404, "objective": 0.037482328712940216, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.9166666865348816, "regularize": 0.03748232498764992, "step": 845 }, { "dpo_loss": 0.5255146026611328, "epoch": 4.818138875767596, "grad_norm": 5.213474190658553, "learning_rate": 1.768028831677926e-08, "logits": -3.0956904888153076, "logps": -199.34555053710938, "loss": 0.0408, "objective": 0.03515857085585594, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.9125000238418579, "regularize": 0.03515855222940445, "step": 850 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 0.6770739555358887, "eval_logits": -3.2704193592071533, "eval_logps": -195.50953674316406, "eval_loss": 0.3951335549354553, "eval_objective": 0.395561158657074, "eval_ranking_idealized": 0.9194214940071106, "eval_ranking_idealized_expo": 0.5309917330741882, "eval_ranking_simple": 0.56611567735672, "eval_regularize": 0.395561158657074, "eval_runtime": 259.1953, "eval_samples_per_second": 22.338, "eval_steps_per_second": 0.934, "step": 850 }, { "dpo_loss": 0.5365945100784302, "epoch": 4.846480869154464, "grad_norm": 4.6357618577755, "learning_rate": 1.2282406866966078e-08, "logits": -3.0836923122406006, "logps": -202.77923583984375, "loss": 0.0369, "objective": 0.03312551975250244, "ranking_idealized": 0.9583333134651184, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.9375, "regularize": 0.033125489950180054, "step": 855 }, { "dpo_loss": 0.5177373290061951, "epoch": 4.874822862541333, "grad_norm": 5.087309484062101, "learning_rate": 7.863060120144316e-09, "logits": -3.0513181686401367, "logps": -197.80010986328125, "loss": 0.0392, "objective": 0.04462633281946182, "ranking_idealized": 0.9125000238418579, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.9208333492279053, "regularize": 0.04462629556655884, "step": 860 }, { "dpo_loss": 0.5258888006210327, "epoch": 4.903164855928201, "grad_norm": 4.542109406021276, "learning_rate": 4.423986410346526e-09, "logits": -3.1244866847991943, "logps": -196.3852081298828, "loss": 0.0396, "objective": 0.05519362911581993, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.9375, "regularize": 0.055193621665239334, "step": 865 }, { "dpo_loss": 0.5206807851791382, "epoch": 4.931506849315069, "grad_norm": 4.732937484587474, "learning_rate": 1.9665384847583622e-09, "logits": -3.1762211322784424, "logps": -202.62925720214844, "loss": 0.0378, "objective": 0.0369645431637764, "ranking_idealized": 0.9208333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.9166666865348816, "regularize": 0.0369645357131958, "step": 870 }, { "dpo_loss": 0.5267921686172485, "epoch": 4.959848842701937, "grad_norm": 4.9576694103498475, "learning_rate": 4.916829716183901e-10, "logits": -3.104861259460449, "logps": -203.89500427246094, "loss": 0.041, "objective": 0.03402137756347656, "ranking_idealized": 0.9375, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.9208333492279053, "regularize": 0.03402136638760567, "step": 875 }, { "dpo_loss": 0.5229869484901428, "epoch": 4.988190836088805, "grad_norm": 4.696769247024194, "learning_rate": 0.0, "logits": -3.1990513801574707, "logps": -196.47311401367188, "loss": 0.0352, "objective": 0.0322914645075798, "ranking_idealized": 0.9458333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.9458333253860474, "regularize": 0.03229145333170891, "step": 880 }, { "epoch": 4.988190836088805, "step": 880, "total_flos": 0.0, "train_loss": 0.1496292933313684, "train_runtime": 35141.4223, "train_samples_per_second": 7.228, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 880, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }