diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,2969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.988190836088805, + "eval_steps": 50, + "global_step": 880, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "dpo_loss": 0.6931471824645996, + "epoch": 0.005668398677373642, + "grad_norm": 1.3413641460666135, + "learning_rate": 5.681818181818182e-08, + "logits": -1.3147305250167847, + "logps": -88.0877456665039, + "loss": 0.4113, + "objective": 0.41588976979255676, + "ranking_idealized": 0.9791666865348816, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.5208333134651184, + "regularize": 0.41588976979255676, + "step": 1 + }, + { + "dpo_loss": 0.6931489109992981, + "epoch": 0.02834199338686821, + "grad_norm": 1.344017584578235, + "learning_rate": 2.840909090909091e-07, + "logits": -1.3680044412612915, + "logps": -84.2520523071289, + "loss": 0.4131, + "objective": 0.3755461275577545, + "ranking_idealized": 0.921875, + "ranking_idealized_expo": 0.5729166865348816, + "ranking_simple": 0.546875, + "regularize": 0.3755461275577545, + "step": 5 + }, + { + "dpo_loss": 0.6928147077560425, + "epoch": 0.05668398677373642, + "grad_norm": 1.297307695578835, + "learning_rate": 5.681818181818182e-07, + "logits": -1.447161078453064, + "logps": -82.31820678710938, + "loss": 0.4176, + "objective": 0.4424538016319275, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.5166666507720947, + "regularize": 0.4424538016319275, + "step": 10 + }, + { + "dpo_loss": 0.6928682923316956, + "epoch": 0.08502598016060463, + "grad_norm": 1.2486707608263468, + "learning_rate": 8.522727272727273e-07, + "logits": -1.4277892112731934, + "logps": -81.3590316772461, + "loss": 0.4254, + "objective": 0.41196563839912415, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.512499988079071, + "regularize": 0.41196563839912415, + "step": 15 + }, + { + "dpo_loss": 0.6925787329673767, + "epoch": 0.11336797354747284, + "grad_norm": 1.4692119961571695, + "learning_rate": 1.1363636363636364e-06, + "logits": -1.4481867551803589, + "logps": -81.8401870727539, + "loss": 0.4151, + "objective": 0.4033361077308655, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.5291666388511658, + "regularize": 0.4033361077308655, + "step": 20 + }, + { + "dpo_loss": 0.6910920739173889, + "epoch": 0.14170996693434104, + "grad_norm": 1.6122424998527856, + "learning_rate": 1.4204545454545458e-06, + "logits": -1.5582950115203857, + "logps": -82.6436538696289, + "loss": 0.4117, + "objective": 0.43133974075317383, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.5166666507720947, + "regularize": 0.43133974075317383, + "step": 25 + }, + { + "dpo_loss": 0.6899585723876953, + "epoch": 0.17005196032120926, + "grad_norm": 2.145868200166189, + "learning_rate": 1.7045454545454546e-06, + "logits": -1.6009422540664673, + "logps": -86.27643585205078, + "loss": 0.4119, + "objective": 0.4145013391971588, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.5666666626930237, + "regularize": 0.4145013391971588, + "step": 30 + }, + { + "dpo_loss": 0.6881429553031921, + "epoch": 0.19839395370807747, + "grad_norm": 3.250036749772235, + "learning_rate": 1.9886363636363638e-06, + "logits": -1.6226321458816528, + "logps": -95.08840942382812, + "loss": 0.408, + "objective": 0.3913627564907074, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.550000011920929, + "regularize": 0.3913627564907074, + "step": 35 + }, + { + "dpo_loss": 0.681670606136322, + "epoch": 0.22673594709494568, + "grad_norm": 2.7778046190059134, + "learning_rate": 2.2727272727272728e-06, + "logits": -1.6656767129898071, + "logps": -101.75907135009766, + "loss": 0.4132, + "objective": 0.42999422550201416, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.6041666865348816, + "regularize": 0.42999422550201416, + "step": 40 + }, + { + "dpo_loss": 0.6795368790626526, + "epoch": 0.25507794048181387, + "grad_norm": 3.073343526840778, + "learning_rate": 2.556818181818182e-06, + "logits": -1.7650772333145142, + "logps": -107.9706039428711, + "loss": 0.4172, + "objective": 0.46339866518974304, + "ranking_idealized": 0.9666666388511658, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.6083333492279053, + "regularize": 0.46339866518974304, + "step": 45 + }, + { + "dpo_loss": 0.6784433126449585, + "epoch": 0.2834199338686821, + "grad_norm": 4.230883583179675, + "learning_rate": 2.8409090909090916e-06, + "logits": -1.6497570276260376, + "logps": -111.7117919921875, + "loss": 0.4052, + "objective": 0.3878687024116516, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.44999998807907104, + "ranking_simple": 0.5666666626930237, + "regularize": 0.3878687024116516, + "step": 50 + }, + { + "epoch": 0.2834199338686821, + "eval_dpo_loss": 0.691393256187439, + "eval_logits": -1.8292194604873657, + "eval_logps": -129.08827209472656, + "eval_loss": 0.4106997549533844, + "eval_objective": 0.41201457381248474, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5371900796890259, + "eval_regularize": 0.41201457381248474, + "eval_runtime": 265.4611, + "eval_samples_per_second": 21.811, + "eval_steps_per_second": 0.912, + "step": 50 + }, + { + "dpo_loss": 0.6729306578636169, + "epoch": 0.3117619272555503, + "grad_norm": 5.170133309231958, + "learning_rate": 3.125e-06, + "logits": -1.7047711610794067, + "logps": -128.38836669921875, + "loss": 0.3945, + "objective": 0.39437106251716614, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.6208333373069763, + "regularize": 0.39437106251716614, + "step": 55 + }, + { + "dpo_loss": 0.673882246017456, + "epoch": 0.3401039206424185, + "grad_norm": 4.832516943098698, + "learning_rate": 3.409090909090909e-06, + "logits": -1.6675713062286377, + "logps": -128.96734619140625, + "loss": 0.3855, + "objective": 0.3761754035949707, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6041666865348816, + "regularize": 0.3761754035949707, + "step": 60 + }, + { + "dpo_loss": 0.669740617275238, + "epoch": 0.3684459140292867, + "grad_norm": 5.367256043496177, + "learning_rate": 3.6931818181818186e-06, + "logits": -1.5951703786849976, + "logps": -137.0664520263672, + "loss": 0.3701, + "objective": 0.36182090640068054, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.6000000238418579, + "regularize": 0.36182090640068054, + "step": 65 + }, + { + "dpo_loss": 0.6654062271118164, + "epoch": 0.39678790741615494, + "grad_norm": 4.94424534111878, + "learning_rate": 3.9772727272727275e-06, + "logits": -1.697352647781372, + "logps": -133.348388671875, + "loss": 0.3724, + "objective": 0.40955594182014465, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.637499988079071, + "regularize": 0.40955594182014465, + "step": 70 + }, + { + "dpo_loss": 0.6624744534492493, + "epoch": 0.42512990080302315, + "grad_norm": 5.966524918481801, + "learning_rate": 4.2613636363636365e-06, + "logits": -1.8367187976837158, + "logps": -136.5087432861328, + "loss": 0.3757, + "objective": 0.38774457573890686, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.625, + "regularize": 0.38774457573890686, + "step": 75 + }, + { + "dpo_loss": 0.6609200835227966, + "epoch": 0.45347189418989137, + "grad_norm": 6.280774873594145, + "learning_rate": 4.5454545454545455e-06, + "logits": -1.9799270629882812, + "logps": -158.3598175048828, + "loss": 0.356, + "objective": 0.364311546087265, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.6625000238418579, + "regularize": 0.364311546087265, + "step": 80 + }, + { + "dpo_loss": 0.6481165289878845, + "epoch": 0.4818138875767596, + "grad_norm": 6.596386501175196, + "learning_rate": 4.829545454545455e-06, + "logits": -2.1429412364959717, + "logps": -150.511474609375, + "loss": 0.3409, + "objective": 0.34856364130973816, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.6625000238418579, + "regularize": 0.34856364130973816, + "step": 85 + }, + { + "dpo_loss": 0.6467424035072327, + "epoch": 0.5101558809636277, + "grad_norm": 7.381865971758164, + "learning_rate": 4.999921328558333e-06, + "logits": -2.0832204818725586, + "logps": -174.22291564941406, + "loss": 0.3472, + "objective": 0.3462918698787689, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.6333333253860474, + "regularize": 0.3462918698787689, + "step": 90 + }, + { + "dpo_loss": 0.6253587007522583, + "epoch": 0.538497874350496, + "grad_norm": 8.119206476955762, + "learning_rate": 4.999036331701828e-06, + "logits": -2.2299115657806396, + "logps": -187.98475646972656, + "loss": 0.3267, + "objective": 0.33174222707748413, + "ranking_idealized": 0.9624999761581421, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.7291666865348816, + "regularize": 0.33174222707748413, + "step": 95 + }, + { + "dpo_loss": 0.6211538314819336, + "epoch": 0.5668398677373642, + "grad_norm": 6.956710000085001, + "learning_rate": 4.997168347957521e-06, + "logits": -2.3657114505767822, + "logps": -187.91744995117188, + "loss": 0.3407, + "objective": 0.3683268129825592, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.7083333134651184, + "regularize": 0.3683268129825592, + "step": 100 + }, + { + "epoch": 0.5668398677373642, + "eval_dpo_loss": 0.6838738322257996, + "eval_logits": -2.5065720081329346, + "eval_logps": -173.33192443847656, + "eval_loss": 0.40174734592437744, + "eval_objective": 0.40628084540367126, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5547520518302917, + "eval_regularize": 0.40628084540367126, + "eval_runtime": 259.1906, + "eval_samples_per_second": 22.339, + "eval_steps_per_second": 0.934, + "step": 100 + }, + { + "dpo_loss": 0.6136354804039001, + "epoch": 0.5951818611242324, + "grad_norm": 9.080339976174114, + "learning_rate": 4.994318112090048e-06, + "logits": -2.1985232830047607, + "logps": -186.09088134765625, + "loss": 0.3217, + "objective": 0.3403078019618988, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.737500011920929, + "regularize": 0.3403078019618988, + "step": 105 + }, + { + "dpo_loss": 0.6185809969902039, + "epoch": 0.6235238545111006, + "grad_norm": 7.791051933395558, + "learning_rate": 4.990486745229364e-06, + "logits": -2.484309434890747, + "logps": -186.34634399414062, + "loss": 0.3255, + "objective": 0.3505449593067169, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.699999988079071, + "regularize": 0.3505449593067169, + "step": 110 + }, + { + "dpo_loss": 0.6309658288955688, + "epoch": 0.6518658478979689, + "grad_norm": 6.754375219280332, + "learning_rate": 4.985675754429744e-06, + "logits": -2.47392201423645, + "logps": -166.2880859375, + "loss": 0.3035, + "objective": 0.2969822585582733, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.6916666626930237, + "regularize": 0.2969822585582733, + "step": 115 + }, + { + "dpo_loss": 0.6232146620750427, + "epoch": 0.680207841284837, + "grad_norm": 8.649064764793055, + "learning_rate": 4.9798870320769884e-06, + "logits": -2.4262490272521973, + "logps": -179.76458740234375, + "loss": 0.2997, + "objective": 0.27002623677253723, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.6916666626930237, + "regularize": 0.27002623677253723, + "step": 120 + }, + { + "dpo_loss": 0.6118648648262024, + "epoch": 0.7085498346717053, + "grad_norm": 8.68434746516712, + "learning_rate": 4.973122855144066e-06, + "logits": -2.3900842666625977, + "logps": -196.9748992919922, + "loss": 0.2995, + "objective": 0.2906176447868347, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.737500011920929, + "regularize": 0.2906176447868347, + "step": 125 + }, + { + "dpo_loss": 0.6124536991119385, + "epoch": 0.7368918280585735, + "grad_norm": 7.486248737537153, + "learning_rate": 4.965385884295467e-06, + "logits": -2.4602267742156982, + "logps": -182.41766357421875, + "loss": 0.2873, + "objective": 0.26530107855796814, + "ranking_idealized": 0.9166666865348816, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.7250000238418579, + "regularize": 0.26530107855796814, + "step": 130 + }, + { + "dpo_loss": 0.606670081615448, + "epoch": 0.7652338214454416, + "grad_norm": 7.285272647997681, + "learning_rate": 4.956679162840646e-06, + "logits": -2.281942844390869, + "logps": -177.3143768310547, + "loss": 0.269, + "objective": 0.270210325717926, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.7208333611488342, + "regularize": 0.270210325717926, + "step": 135 + }, + { + "dpo_loss": 0.6003846526145935, + "epoch": 0.7935758148323099, + "grad_norm": 7.413948788955954, + "learning_rate": 4.947006115536947e-06, + "logits": -2.1732773780822754, + "logps": -176.40997314453125, + "loss": 0.2664, + "objective": 0.24727170169353485, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.7666666507720947, + "regularize": 0.24727170169353485, + "step": 140 + }, + { + "dpo_loss": 0.6026275157928467, + "epoch": 0.821917808219178, + "grad_norm": 7.450961549840002, + "learning_rate": 4.9363705472424825e-06, + "logits": -2.2946832180023193, + "logps": -178.30978393554688, + "loss": 0.2707, + "objective": 0.2942873537540436, + "ranking_idealized": 0.9125000238418579, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.7250000238418579, + "regularize": 0.2942873537540436, + "step": 145 + }, + { + "dpo_loss": 0.6038042306900024, + "epoch": 0.8502598016060463, + "grad_norm": 8.337666421628235, + "learning_rate": 4.924776641419513e-06, + "logits": -2.2924880981445312, + "logps": -175.90670776367188, + "loss": 0.2596, + "objective": 0.2636435329914093, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.7541666626930237, + "regularize": 0.2636435329914093, + "step": 150 + }, + { + "epoch": 0.8502598016060463, + "eval_dpo_loss": 0.6806454658508301, + "eval_logits": -2.4464104175567627, + "eval_logps": -188.63946533203125, + "eval_loss": 0.4017498791217804, + "eval_objective": 0.40516260266304016, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5423553586006165, + "eval_regularize": 0.40516260266304016, + "eval_runtime": 259.5657, + "eval_samples_per_second": 22.306, + "eval_steps_per_second": 0.932, + "step": 150 + }, + { + "dpo_loss": 0.5875340700149536, + "epoch": 0.8786017949929145, + "grad_norm": 8.964143655715564, + "learning_rate": 4.9122289584888926e-06, + "logits": -2.3187806606292725, + "logps": -185.90478515625, + "loss": 0.2651, + "objective": 0.26818570494651794, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.800000011920929, + "regularize": 0.26818570494651794, + "step": 155 + }, + { + "dpo_loss": 0.5894069671630859, + "epoch": 0.9069437883797827, + "grad_norm": 7.833211918555924, + "learning_rate": 4.8987324340362445e-06, + "logits": -2.2485156059265137, + "logps": -194.0889129638672, + "loss": 0.2472, + "objective": 0.24632495641708374, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.7541666626930237, + "regularize": 0.24632495641708374, + "step": 160 + }, + { + "dpo_loss": 0.5998678803443909, + "epoch": 0.9352857817666509, + "grad_norm": 6.957604774053052, + "learning_rate": 4.884292376870567e-06, + "logits": -2.367635488510132, + "logps": -169.78195190429688, + "loss": 0.2564, + "objective": 0.26594653725624084, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.7208333611488342, + "regularize": 0.26594653725624084, + "step": 165 + }, + { + "dpo_loss": 0.5835825204849243, + "epoch": 0.9636277751535192, + "grad_norm": 8.930600570817928, + "learning_rate": 4.868914466936038e-06, + "logits": -2.363553047180176, + "logps": -192.9253387451172, + "loss": 0.2434, + "objective": 0.22550734877586365, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.7708333134651184, + "regularize": 0.22550733387470245, + "step": 170 + }, + { + "dpo_loss": 0.5910046100616455, + "epoch": 0.9919697685403873, + "grad_norm": 7.732085709182539, + "learning_rate": 4.8526047530778175e-06, + "logits": -2.2910239696502686, + "logps": -191.64141845703125, + "loss": 0.2496, + "objective": 0.24141448736190796, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.6000000238418579, + "ranking_simple": 0.7875000238418579, + "regularize": 0.24141448736190796, + "step": 175 + }, + { + "dpo_loss": 0.5720356106758118, + "epoch": 1.0203117619272555, + "grad_norm": 7.878585873914542, + "learning_rate": 4.835369650662767e-06, + "logits": -2.5219788551330566, + "logps": -186.59535217285156, + "loss": 0.2212, + "objective": 0.21063460409641266, + "ranking_idealized": 0.9125000238418579, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.8125, + "regularize": 0.21063460409641266, + "step": 180 + }, + { + "dpo_loss": 0.5561386942863464, + "epoch": 1.0486537553141237, + "grad_norm": 8.429656140437402, + "learning_rate": 4.817215939055984e-06, + "logits": -2.44401478767395, + "logps": -204.15762329101562, + "loss": 0.2153, + "objective": 0.21741175651550293, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.8458333611488342, + "regularize": 0.21741175651550293, + "step": 185 + }, + { + "dpo_loss": 0.5703259706497192, + "epoch": 1.076995748700992, + "grad_norm": 6.743069420703677, + "learning_rate": 4.798150758954164e-06, + "logits": -2.489015817642212, + "logps": -198.45516967773438, + "loss": 0.2003, + "objective": 0.17540977895259857, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.8333333134651184, + "regularize": 0.17540977895259857, + "step": 190 + }, + { + "dpo_loss": 0.5622718930244446, + "epoch": 1.10533774208786, + "grad_norm": 7.079228513521207, + "learning_rate": 4.778181609576832e-06, + "logits": -2.429560661315918, + "logps": -182.86814880371094, + "loss": 0.1994, + "objective": 0.16888141632080078, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.875, + "regularize": 0.16888141632080078, + "step": 195 + }, + { + "dpo_loss": 0.5696191787719727, + "epoch": 1.1336797354747283, + "grad_norm": 6.830259665006017, + "learning_rate": 4.757316345716554e-06, + "logits": -2.513395071029663, + "logps": -192.72938537597656, + "loss": 0.1965, + "objective": 0.20290271937847137, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.8416666388511658, + "regularize": 0.20290271937847137, + "step": 200 + }, + { + "epoch": 1.1336797354747283, + "eval_dpo_loss": 0.6801125407218933, + "eval_logits": -2.597655773162842, + "eval_logps": -193.1246795654297, + "eval_loss": 0.40018174052238464, + "eval_objective": 0.40411826968193054, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.55888432264328, + "eval_regularize": 0.40411826968193054, + "eval_runtime": 259.3248, + "eval_samples_per_second": 22.327, + "eval_steps_per_second": 0.933, + "step": 200 + }, + { + "dpo_loss": 0.5719407200813293, + "epoch": 1.1620217288615966, + "grad_norm": 6.896008583963979, + "learning_rate": 4.735563174649278e-06, + "logits": -2.5157065391540527, + "logps": -198.75962829589844, + "loss": 0.205, + "objective": 0.2138771265745163, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.8166666626930237, + "regularize": 0.2138771265745163, + "step": 205 + }, + { + "dpo_loss": 0.5655397176742554, + "epoch": 1.1903637222484649, + "grad_norm": 6.830065558874749, + "learning_rate": 4.7129306529060415e-06, + "logits": -2.547936201095581, + "logps": -187.2952117919922, + "loss": 0.2025, + "objective": 0.22636540234088898, + "ranking_idealized": 0.9041666388511658, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.8166666626930237, + "regularize": 0.2263653725385666, + "step": 210 + }, + { + "dpo_loss": 0.5676775574684143, + "epoch": 1.2187057156353331, + "grad_norm": 6.8520964757916945, + "learning_rate": 4.68942768290728e-06, + "logits": -2.54328989982605, + "logps": -187.34585571289062, + "loss": 0.1863, + "objective": 0.19478672742843628, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.8416666388511658, + "regularize": 0.19478671252727509, + "step": 215 + }, + { + "dpo_loss": 0.5695532560348511, + "epoch": 1.2470477090222012, + "grad_norm": 6.476230536915824, + "learning_rate": 4.665063509461098e-06, + "logits": -2.455770969390869, + "logps": -187.62728881835938, + "loss": 0.1957, + "objective": 0.1894843727350235, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.6083333492279053, + "ranking_simple": 0.8583333492279053, + "regularize": 0.1894843727350235, + "step": 220 + }, + { + "dpo_loss": 0.5596610903739929, + "epoch": 1.2753897024090695, + "grad_norm": 6.702837995316673, + "learning_rate": 4.639847716126855e-06, + "logits": -2.4951536655426025, + "logps": -189.64401245117188, + "loss": 0.1959, + "objective": 0.1725076138973236, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.8583333492279053, + "regularize": 0.1725076138973236, + "step": 225 + }, + { + "dpo_loss": 0.5657731890678406, + "epoch": 1.3037316957959377, + "grad_norm": 7.276570061796103, + "learning_rate": 4.613790221445511e-06, + "logits": -2.5368714332580566, + "logps": -193.73602294921875, + "loss": 0.1875, + "objective": 0.18021216988563538, + "ranking_idealized": 0.9125000238418579, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.8041666746139526, + "regularize": 0.18021215498447418, + "step": 230 + }, + { + "dpo_loss": 0.5439472794532776, + "epoch": 1.3320736891828058, + "grad_norm": 7.643293399256914, + "learning_rate": 4.586901275038201e-06, + "logits": -2.8654701709747314, + "logps": -191.69354248046875, + "loss": 0.1942, + "objective": 0.2024109810590744, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.8416666388511658, + "regularize": 0.2024109810590744, + "step": 235 + }, + { + "dpo_loss": 0.5640192031860352, + "epoch": 1.360415682569674, + "grad_norm": 7.002186203050705, + "learning_rate": 4.559191453574582e-06, + "logits": -2.7892987728118896, + "logps": -191.39663696289062, + "loss": 0.1853, + "objective": 0.19372233748435974, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.7958333492279053, + "regularize": 0.19372233748435974, + "step": 240 + }, + { + "dpo_loss": 0.5699235796928406, + "epoch": 1.3887576759565423, + "grad_norm": 7.135454177759647, + "learning_rate": 4.530671656612544e-06, + "logits": -2.747896909713745, + "logps": -188.15423583984375, + "loss": 0.172, + "objective": 0.18116973340511322, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.8291666507720947, + "regularize": 0.18116970360279083, + "step": 245 + }, + { + "dpo_loss": 0.5546202063560486, + "epoch": 1.4170996693434104, + "grad_norm": 6.856611462056187, + "learning_rate": 4.501353102310901e-06, + "logits": -2.626624822616577, + "logps": -189.72596740722656, + "loss": 0.1784, + "objective": 0.19344764947891235, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.8541666865348816, + "regularize": 0.19344764947891235, + "step": 250 + }, + { + "epoch": 1.4170996693434104, + "eval_dpo_loss": 0.6801539659500122, + "eval_logits": -2.7528111934661865, + "eval_logps": -189.47007751464844, + "eval_loss": 0.39902833104133606, + "eval_objective": 0.40230515599250793, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5619834661483765, + "eval_regularize": 0.40230515599250793, + "eval_runtime": 258.6625, + "eval_samples_per_second": 22.384, + "eval_steps_per_second": 0.936, + "step": 250 + }, + { + "dpo_loss": 0.5707473754882812, + "epoch": 1.4454416627302786, + "grad_norm": 6.789096244940944, + "learning_rate": 4.4712473230167775e-06, + "logits": -2.524132490158081, + "logps": -189.31150817871094, + "loss": 0.1841, + "objective": 0.17286911606788635, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.8458333611488342, + "regularize": 0.17286911606788635, + "step": 255 + }, + { + "dpo_loss": 0.5669309496879578, + "epoch": 1.473783656117147, + "grad_norm": 7.262023839822884, + "learning_rate": 4.440366160729393e-06, + "logits": -2.642547130584717, + "logps": -196.33497619628906, + "loss": 0.1778, + "objective": 0.19296441972255707, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.8541666865348816, + "regularize": 0.19296441972255707, + "step": 260 + }, + { + "dpo_loss": 0.546380877494812, + "epoch": 1.5021256495040152, + "grad_norm": 6.836875807788374, + "learning_rate": 4.4087217624420595e-06, + "logits": -2.617671251296997, + "logps": -200.48138427734375, + "loss": 0.18, + "objective": 0.1762746423482895, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.8541666865348816, + "regularize": 0.1762746423482895, + "step": 265 + }, + { + "dpo_loss": 0.5641717910766602, + "epoch": 1.5304676428908834, + "grad_norm": 6.457668960267231, + "learning_rate": 4.376326575364206e-06, + "logits": -2.5867457389831543, + "logps": -194.27902221679688, + "loss": 0.1782, + "objective": 0.19895337522029877, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.8500000238418579, + "regularize": 0.19895337522029877, + "step": 270 + }, + { + "dpo_loss": 0.5631863474845886, + "epoch": 1.5588096362777515, + "grad_norm": 6.814477250627082, + "learning_rate": 4.34319334202531e-06, + "logits": -2.5872161388397217, + "logps": -194.5428924560547, + "loss": 0.1719, + "objective": 0.16667112708091736, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.824999988079071, + "regularize": 0.16667112708091736, + "step": 275 + }, + { + "dpo_loss": 0.5430881977081299, + "epoch": 1.5871516296646198, + "grad_norm": 7.393370645908027, + "learning_rate": 4.309335095262675e-06, + "logits": -2.4844515323638916, + "logps": -203.0095977783203, + "loss": 0.1821, + "objective": 0.18616026639938354, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.862500011920929, + "regularize": 0.18616026639938354, + "step": 280 + }, + { + "dpo_loss": 0.5480186939239502, + "epoch": 1.615493623051488, + "grad_norm": 6.140317838250891, + "learning_rate": 4.274765153095008e-06, + "logits": -2.618225574493408, + "logps": -192.17019653320312, + "loss": 0.1677, + "objective": 0.16235129535198212, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.8541666865348816, + "regularize": 0.16235129535198212, + "step": 285 + }, + { + "dpo_loss": 0.5588306188583374, + "epoch": 1.643835616438356, + "grad_norm": 6.415461406420722, + "learning_rate": 4.239497113483819e-06, + "logits": -2.691134214401245, + "logps": -191.76356506347656, + "loss": 0.1678, + "objective": 0.15764465928077698, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.8333333134651184, + "regularize": 0.15764465928077698, + "step": 290 + }, + { + "dpo_loss": 0.5371195673942566, + "epoch": 1.6721776098252243, + "grad_norm": 6.197778580003095, + "learning_rate": 4.203544848984729e-06, + "logits": -2.665118455886841, + "logps": -199.97247314453125, + "loss": 0.1669, + "objective": 0.15341004729270935, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.8958333134651184, + "regularize": 0.15341004729270935, + "step": 295 + }, + { + "dpo_loss": 0.5480075478553772, + "epoch": 1.7005196032120926, + "grad_norm": 6.138490802083033, + "learning_rate": 4.16692250129073e-06, + "logits": -2.7728724479675293, + "logps": -199.02529907226562, + "loss": 0.1717, + "objective": 0.20036275684833527, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.875, + "regularize": 0.20036275684833527, + "step": 300 + }, + { + "epoch": 1.7005196032120926, + "eval_dpo_loss": 0.6798518300056458, + "eval_logits": -2.877673625946045, + "eval_logps": -195.73040771484375, + "eval_loss": 0.40205851197242737, + "eval_objective": 0.40424150228500366, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5454545617103577, + "eval_regularize": 0.40424150228500366, + "eval_runtime": 259.7378, + "eval_samples_per_second": 22.292, + "eval_steps_per_second": 0.932, + "step": 300 + }, + { + "dpo_loss": 0.5607944130897522, + "epoch": 1.7288615965989607, + "grad_norm": 6.327660769011926, + "learning_rate": 4.129644475669617e-06, + "logits": -2.741549253463745, + "logps": -191.5762481689453, + "loss": 0.1638, + "objective": 0.17037154734134674, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.8333333134651184, + "regularize": 0.17037154734134674, + "step": 305 + }, + { + "dpo_loss": 0.5504526495933533, + "epoch": 1.7572035899858292, + "grad_norm": 6.7403750373805575, + "learning_rate": 4.091725435297721e-06, + "logits": -2.7614734172821045, + "logps": -190.3129425048828, + "loss": 0.1573, + "objective": 0.1668892502784729, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.8500000238418579, + "regularize": 0.1668892502784729, + "step": 310 + }, + { + "dpo_loss": 0.5544535517692566, + "epoch": 1.7855455833726972, + "grad_norm": 6.316947546186969, + "learning_rate": 4.053180295492203e-06, + "logits": -2.530224084854126, + "logps": -189.7359161376953, + "loss": 0.1665, + "objective": 0.15285438299179077, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.8500000238418579, + "regularize": 0.15285435318946838, + "step": 315 + }, + { + "dpo_loss": 0.5565517544746399, + "epoch": 1.8138875767595655, + "grad_norm": 6.951800105794237, + "learning_rate": 4.014024217844167e-06, + "logits": -2.596423864364624, + "logps": -201.33631896972656, + "loss": 0.1609, + "objective": 0.1613713800907135, + "ranking_idealized": 0.9041666388511658, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.8166666626930237, + "regularize": 0.1613713800907135, + "step": 320 + }, + { + "dpo_loss": 0.5427613854408264, + "epoch": 1.8422295701464337, + "grad_norm": 7.385375003834793, + "learning_rate": 3.974272604254906e-06, + "logits": -2.746447801589966, + "logps": -198.5028839111328, + "loss": 0.1637, + "objective": 0.15741844475269318, + "ranking_idealized": 0.9583333134651184, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.8833333253860474, + "regularize": 0.15741844475269318, + "step": 325 + }, + { + "dpo_loss": 0.5491302609443665, + "epoch": 1.8705715635333018, + "grad_norm": 7.03671844997743, + "learning_rate": 3.933941090877615e-06, + "logits": -2.5696513652801514, + "logps": -197.8240203857422, + "loss": 0.1572, + "objective": 0.15931017696857452, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.8374999761581421, + "regularize": 0.15931017696857452, + "step": 330 + }, + { + "dpo_loss": 0.5589691400527954, + "epoch": 1.89891355692017, + "grad_norm": 6.477813384915639, + "learning_rate": 3.893045541966975e-06, + "logits": -2.762031316757202, + "logps": -203.58236694335938, + "loss": 0.1535, + "objective": 0.15087805688381195, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.8708333373069763, + "regularize": 0.15087805688381195, + "step": 335 + }, + { + "dpo_loss": 0.5663090944290161, + "epoch": 1.9272555503070383, + "grad_norm": 6.018506322545624, + "learning_rate": 3.8516020436389945e-06, + "logits": -2.7401764392852783, + "logps": -201.431884765625, + "loss": 0.1465, + "objective": 0.14212678372859955, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.8458333611488342, + "regularize": 0.14212678372859955, + "step": 340 + }, + { + "dpo_loss": 0.5585800409317017, + "epoch": 1.9555975436939064, + "grad_norm": 6.624036944677984, + "learning_rate": 3.8096268975436045e-06, + "logits": -2.8644747734069824, + "logps": -205.10971069335938, + "loss": 0.1496, + "objective": 0.15010811388492584, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.8500000238418579, + "regularize": 0.15010811388492584, + "step": 345 + }, + { + "dpo_loss": 0.5542294383049011, + "epoch": 1.9839395370807746, + "grad_norm": 6.797447561538416, + "learning_rate": 3.767136614452458e-06, + "logits": -2.858165740966797, + "logps": -216.2846221923828, + "loss": 0.1527, + "objective": 0.1548275649547577, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.862500011920929, + "regularize": 0.1548275649547577, + "step": 350 + }, + { + "epoch": 1.9839395370807746, + "eval_dpo_loss": 0.6759648323059082, + "eval_logits": -3.1101362705230713, + "eval_logps": -211.60682678222656, + "eval_loss": 0.39596185088157654, + "eval_objective": 0.39695027470588684, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5557851195335388, + "eval_regularize": 0.39695027470588684, + "eval_runtime": 259.1655, + "eval_samples_per_second": 22.341, + "eval_steps_per_second": 0.934, + "step": 350 + }, + { + "dpo_loss": 0.5424126386642456, + "epoch": 2.012281530467643, + "grad_norm": 5.8063883271711685, + "learning_rate": 3.724147907764478e-06, + "logits": -2.7706944942474365, + "logps": -205.6593780517578, + "loss": 0.1484, + "objective": 0.1335248053073883, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.8958333134651184, + "regularize": 0.1335248053073883, + "step": 355 + }, + { + "dpo_loss": 0.5424516797065735, + "epoch": 2.040623523854511, + "grad_norm": 6.095590594476145, + "learning_rate": 3.6806776869317074e-06, + "logits": -2.8919057846069336, + "logps": -198.0377655029297, + "loss": 0.1296, + "objective": 0.13360460102558136, + "ranking_idealized": 0.9750000238418579, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.8958333134651184, + "regularize": 0.13360460102558136, + "step": 360 + }, + { + "dpo_loss": 0.5373047590255737, + "epoch": 2.0689655172413794, + "grad_norm": 6.231030884897393, + "learning_rate": 3.6367430508080283e-06, + "logits": -3.063735008239746, + "logps": -207.88970947265625, + "loss": 0.1326, + "objective": 0.12027280777692795, + "ranking_idealized": 0.9083333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.8458333611488342, + "regularize": 0.12027280777692795, + "step": 365 + }, + { + "dpo_loss": 0.5498053431510925, + "epoch": 2.0973075106282475, + "grad_norm": 6.774825067357434, + "learning_rate": 3.5923612809233987e-06, + "logits": -3.0324106216430664, + "logps": -194.41429138183594, + "loss": 0.1275, + "objective": 0.12781473994255066, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.8541666865348816, + "regularize": 0.12781472504138947, + "step": 370 + }, + { + "dpo_loss": 0.5362412333488464, + "epoch": 2.1256495040151155, + "grad_norm": 5.992693343450592, + "learning_rate": 3.547549834686222e-06, + "logits": -3.0772500038146973, + "logps": -205.6930694580078, + "loss": 0.1251, + "objective": 0.12155468761920929, + "ranking_idealized": 0.9750000238418579, + "ranking_idealized_expo": 0.6333333253860474, + "ranking_simple": 0.9125000238418579, + "regularize": 0.12155468761920929, + "step": 375 + }, + { + "dpo_loss": 0.5477665066719055, + "epoch": 2.153991497401984, + "grad_norm": 5.66117672582953, + "learning_rate": 3.5023263385165346e-06, + "logits": -2.971487522125244, + "logps": -195.91168212890625, + "loss": 0.1269, + "objective": 0.11776351928710938, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.8833333253860474, + "regularize": 0.11776351928710938, + "step": 380 + }, + { + "dpo_loss": 0.5528424382209778, + "epoch": 2.182333490788852, + "grad_norm": 5.673453112921881, + "learning_rate": 3.4567085809127247e-06, + "logits": -3.0804078578948975, + "logps": -187.17169189453125, + "loss": 0.1248, + "objective": 0.1074480265378952, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.9166666865348816, + "regularize": 0.1074480265378952, + "step": 385 + }, + { + "dpo_loss": 0.5393837094306946, + "epoch": 2.21067548417572, + "grad_norm": 6.024104465892304, + "learning_rate": 3.410714505454486e-06, + "logits": -2.998112916946411, + "logps": -206.6367645263672, + "loss": 0.1224, + "objective": 0.11136513203382492, + "ranking_idealized": 0.9083333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.8666666746139526, + "regularize": 0.11136512458324432, + "step": 390 + }, + { + "dpo_loss": 0.5289559364318848, + "epoch": 2.2390174775625886, + "grad_norm": 6.100671720050322, + "learning_rate": 3.364362203744777e-06, + "logits": -3.014930009841919, + "logps": -199.62350463867188, + "loss": 0.1358, + "objective": 0.1332855224609375, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.8999999761581421, + "regularize": 0.1332855224609375, + "step": 395 + }, + { + "dpo_loss": 0.5282385945320129, + "epoch": 2.2673594709494567, + "grad_norm": 5.884120825175806, + "learning_rate": 3.3176699082935546e-06, + "logits": -3.104701042175293, + "logps": -204.1650390625, + "loss": 0.1267, + "objective": 0.13824278116226196, + "ranking_idealized": 0.9666666388511658, + "ranking_idealized_expo": 0.6041666865348816, + "ranking_simple": 0.9083333611488342, + "regularize": 0.13824278116226196, + "step": 400 + }, + { + "epoch": 2.2673594709494567, + "eval_dpo_loss": 0.6775676608085632, + "eval_logits": -3.2515387535095215, + "eval_logps": -201.03680419921875, + "eval_loss": 0.39813509583473206, + "eval_objective": 0.39980101585388184, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5619834661483765, + "eval_regularize": 0.39980101585388184, + "eval_runtime": 259.6475, + "eval_samples_per_second": 22.299, + "eval_steps_per_second": 0.932, + "step": 400 + }, + { + "dpo_loss": 0.5318711400032043, + "epoch": 2.295701464336325, + "grad_norm": 5.891428026423688, + "learning_rate": 3.2706559853460818e-06, + "logits": -3.1382436752319336, + "logps": -204.19851684570312, + "loss": 0.1245, + "objective": 0.12317010760307312, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.8999999761581421, + "regularize": 0.12317009270191193, + "step": 405 + }, + { + "dpo_loss": 0.5330458879470825, + "epoch": 2.324043457723193, + "grad_norm": 5.883839309354464, + "learning_rate": 3.2233389276586325e-06, + "logits": -2.8399434089660645, + "logps": -203.78355407714844, + "loss": 0.1172, + "objective": 0.11361113935709, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.862500011920929, + "regularize": 0.1136111319065094, + "step": 410 + }, + { + "dpo_loss": 0.5357276797294617, + "epoch": 2.3523854511100613, + "grad_norm": 5.777709064523667, + "learning_rate": 3.1757373472244324e-06, + "logits": -2.7951467037200928, + "logps": -203.05201721191406, + "loss": 0.1182, + "objective": 0.12953059375286102, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.9041666388511658, + "regularize": 0.12953059375286102, + "step": 415 + }, + { + "dpo_loss": 0.5517702698707581, + "epoch": 2.3807274444969297, + "grad_norm": 5.823648414077854, + "learning_rate": 3.127869967952698e-06, + "logits": -2.653197765350342, + "logps": -199.99070739746094, + "loss": 0.124, + "objective": 0.12403346598148346, + "ranking_idealized": 0.9041666388511658, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.8333333134651184, + "regularize": 0.12403346598148346, + "step": 420 + }, + { + "dpo_loss": 0.5411447286605835, + "epoch": 2.409069437883798, + "grad_norm": 5.406034966497648, + "learning_rate": 3.0797556183036582e-06, + "logits": -2.7264721393585205, + "logps": -199.3270721435547, + "loss": 0.1206, + "objective": 0.11311660706996918, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.887499988079071, + "regularize": 0.11311660706996918, + "step": 425 + }, + { + "dpo_loss": 0.5396248698234558, + "epoch": 2.4374114312706663, + "grad_norm": 5.630938578254106, + "learning_rate": 3.0314132238824416e-06, + "logits": -2.8288919925689697, + "logps": -200.36387634277344, + "loss": 0.1145, + "objective": 0.10814479738473892, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.8916666507720947, + "regularize": 0.10814479738473892, + "step": 430 + }, + { + "dpo_loss": 0.537260890007019, + "epoch": 2.4657534246575343, + "grad_norm": 5.382852925810034, + "learning_rate": 2.9828617999947647e-06, + "logits": -2.9378559589385986, + "logps": -207.9824676513672, + "loss": 0.1177, + "objective": 0.1222720518708229, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.8833333253860474, + "regularize": 0.1222720518708229, + "step": 435 + }, + { + "dpo_loss": 0.5335346460342407, + "epoch": 2.4940954180444024, + "grad_norm": 5.4087072313434, + "learning_rate": 2.9341204441673267e-06, + "logits": -2.8663976192474365, + "logps": -201.47125244140625, + "loss": 0.1171, + "objective": 0.11767010390758514, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.887499988079071, + "regularize": 0.11767010390758514, + "step": 440 + }, + { + "dpo_loss": 0.5356226563453674, + "epoch": 2.5224374114312704, + "grad_norm": 5.709598259442252, + "learning_rate": 2.8852083286358647e-06, + "logits": -2.838826894760132, + "logps": -196.2780303955078, + "loss": 0.1126, + "objective": 0.10499007254838943, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.9041666388511658, + "regularize": 0.10499005764722824, + "step": 445 + }, + { + "dpo_loss": 0.5633688569068909, + "epoch": 2.550779404818139, + "grad_norm": 5.3793816763659255, + "learning_rate": 2.8361446928038298e-06, + "logits": -2.838387966156006, + "logps": -201.06651306152344, + "loss": 0.1121, + "objective": 0.11985477060079575, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.8458333611488342, + "regularize": 0.11985477060079575, + "step": 450 + }, + { + "epoch": 2.550779404818139, + "eval_dpo_loss": 0.6782127022743225, + "eval_logits": -2.9522619247436523, + "eval_logps": -192.7809295654297, + "eval_loss": 0.39569520950317383, + "eval_objective": 0.3975852131843567, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5619834661483765, + "eval_regularize": 0.3975852131843567, + "eval_runtime": 258.9142, + "eval_samples_per_second": 22.363, + "eval_steps_per_second": 0.935, + "step": 450 + }, + { + "dpo_loss": 0.5279621481895447, + "epoch": 2.579121398205007, + "grad_norm": 5.392681143501708, + "learning_rate": 2.7869488356746344e-06, + "logits": -2.902580499649048, + "logps": -197.00804138183594, + "loss": 0.1147, + "objective": 0.11110316216945648, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5208333134651184, + "ranking_simple": 0.9041666388511658, + "regularize": 0.11110316216945648, + "step": 455 + }, + { + "dpo_loss": 0.5277626514434814, + "epoch": 2.6074633915918755, + "grad_norm": 5.315354427263376, + "learning_rate": 2.7376401082604563e-06, + "logits": -3.0961711406707764, + "logps": -201.3343048095703, + "loss": 0.1143, + "objective": 0.11841437220573425, + "ranking_idealized": 0.9624999761581421, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.9291666746139526, + "regularize": 0.11841436475515366, + "step": 460 + }, + { + "dpo_loss": 0.5351486802101135, + "epoch": 2.6358053849787435, + "grad_norm": 5.411425844401666, + "learning_rate": 2.6882379059705953e-06, + "logits": -3.0071170330047607, + "logps": -197.46665954589844, + "loss": 0.1123, + "objective": 0.11872568726539612, + "ranking_idealized": 0.9166666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.8833333253860474, + "regularize": 0.11872567981481552, + "step": 465 + }, + { + "dpo_loss": 0.5352925658226013, + "epoch": 2.6641473783656116, + "grad_norm": 5.65268285367403, + "learning_rate": 2.6387616609823506e-06, + "logits": -3.0599467754364014, + "logps": -206.38381958007812, + "loss": 0.1029, + "objective": 0.103180892765522, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.9166666865348816, + "regularize": 0.103180892765522, + "step": 470 + }, + { + "dpo_loss": 0.5457909107208252, + "epoch": 2.69248937175248, + "grad_norm": 5.4108717659373395, + "learning_rate": 2.5892308345974517e-06, + "logits": -2.9345638751983643, + "logps": -191.0044403076172, + "loss": 0.1057, + "objective": 0.1046978086233139, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.887499988079071, + "regularize": 0.1046978086233139, + "step": 475 + }, + { + "dpo_loss": 0.5475970506668091, + "epoch": 2.720831365139348, + "grad_norm": 5.319894863881298, + "learning_rate": 2.53966490958702e-06, + "logits": -3.068021059036255, + "logps": -189.0258026123047, + "loss": 0.1101, + "objective": 0.10782204568386078, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.8916666507720947, + "regularize": 0.10782204568386078, + "step": 480 + }, + { + "dpo_loss": 0.537803590297699, + "epoch": 2.7491733585262166, + "grad_norm": 5.436329599180351, + "learning_rate": 2.490083382528097e-06, + "logits": -3.1013996601104736, + "logps": -200.41253662109375, + "loss": 0.1114, + "objective": 0.11053992807865143, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.887499988079071, + "regularize": 0.11053992807865143, + "step": 485 + }, + { + "dpo_loss": 0.5218387842178345, + "epoch": 2.7775153519130846, + "grad_norm": 5.475491663367257, + "learning_rate": 2.440505756134732e-06, + "logits": -3.1719369888305664, + "logps": -201.2511444091797, + "loss": 0.1091, + "objective": 0.1095014289021492, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.9041666388511658, + "regularize": 0.1095014289021492, + "step": 490 + }, + { + "dpo_loss": 0.5326829552650452, + "epoch": 2.8058573452999527, + "grad_norm": 5.099192097500262, + "learning_rate": 2.3909515315866606e-06, + "logits": -3.1574199199676514, + "logps": -198.6073760986328, + "loss": 0.1017, + "objective": 0.09904598444700241, + "ranking_idealized": 0.9166666865348816, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.887499988079071, + "regularize": 0.09904597699642181, + "step": 495 + }, + { + "dpo_loss": 0.5235782861709595, + "epoch": 2.8341993386868207, + "grad_norm": 5.2339920064616186, + "learning_rate": 2.341440200858589e-06, + "logits": -3.177107810974121, + "logps": -198.19998168945312, + "loss": 0.1063, + "objective": 0.10962475836277008, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.9125000238418579, + "regularize": 0.10962474346160889, + "step": 500 + }, + { + "epoch": 2.8341993386868207, + "eval_dpo_loss": 0.6759718656539917, + "eval_logits": -3.2835006713867188, + "eval_logps": -195.79200744628906, + "eval_loss": 0.39410659670829773, + "eval_objective": 0.3949359357357025, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.567148745059967, + "eval_regularize": 0.3949359357357025, + "eval_runtime": 259.0636, + "eval_samples_per_second": 22.35, + "eval_steps_per_second": 0.934, + "step": 500 + }, + { + "dpo_loss": 0.5304385423660278, + "epoch": 2.862541332073689, + "grad_norm": 5.373601466021835, + "learning_rate": 2.2919912390530945e-06, + "logits": -3.0923917293548584, + "logps": -199.0631866455078, + "loss": 0.102, + "objective": 0.10511735081672668, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.9083333611488342, + "regularize": 0.10511735081672668, + "step": 505 + }, + { + "dpo_loss": 0.5375287532806396, + "epoch": 2.8908833254605573, + "grad_norm": 6.218818504345056, + "learning_rate": 2.242624096740164e-06, + "logits": -3.0648419857025146, + "logps": -198.03871154785156, + "loss": 0.1009, + "objective": 0.09942923486232758, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.8916666507720947, + "regularize": 0.09942923486232758, + "step": 510 + }, + { + "dpo_loss": 0.5454570651054382, + "epoch": 2.9192253188474258, + "grad_norm": 5.360667892296426, + "learning_rate": 2.193358192306384e-06, + "logits": -3.177243709564209, + "logps": -192.1931610107422, + "loss": 0.1015, + "objective": 0.1067223846912384, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.887499988079071, + "regularize": 0.1067223846912384, + "step": 515 + }, + { + "dpo_loss": 0.5447422862052917, + "epoch": 2.947567312234294, + "grad_norm": 5.250252460609212, + "learning_rate": 2.1442129043167877e-06, + "logits": -2.988645076751709, + "logps": -196.80099487304688, + "loss": 0.0957, + "objective": 0.09414150565862656, + "ranking_idealized": 0.9083333611488342, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.8833333253860474, + "regularize": 0.09414150565862656, + "step": 520 + }, + { + "dpo_loss": 0.5430293679237366, + "epoch": 2.975909305621162, + "grad_norm": 5.566335571750069, + "learning_rate": 2.0952075638923656e-06, + "logits": -2.975144863128662, + "logps": -196.40476989746094, + "loss": 0.0946, + "objective": 0.10850825160741806, + "ranking_idealized": 0.8999999761581421, + "ranking_idealized_expo": 0.5833333134651184, + "ranking_simple": 0.8666666746139526, + "regularize": 0.10850825160741806, + "step": 525 + }, + { + "dpo_loss": 0.5192977786064148, + "epoch": 3.0042512990080303, + "grad_norm": 5.207616334659725, + "learning_rate": 2.046361447106244e-06, + "logits": -2.9917781352996826, + "logps": -199.7664031982422, + "loss": 0.099, + "objective": 0.09743621945381165, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.925000011920929, + "regularize": 0.09743621200323105, + "step": 530 + }, + { + "dpo_loss": 0.5329768061637878, + "epoch": 3.0325932923948984, + "grad_norm": 5.005538343407122, + "learning_rate": 1.997693767401503e-06, + "logits": -3.075000286102295, + "logps": -204.2377166748047, + "loss": 0.0882, + "objective": 0.08230598270893097, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.9083333611488342, + "regularize": 0.08230597525835037, + "step": 535 + }, + { + "dpo_loss": 0.538392961025238, + "epoch": 3.0609352857817664, + "grad_norm": 4.866997353798794, + "learning_rate": 1.9492236680336486e-06, + "logits": -3.1421122550964355, + "logps": -189.7461700439453, + "loss": 0.0842, + "objective": 0.07560276240110397, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5708333253860474, + "ranking_simple": 0.9083333611488342, + "regularize": 0.07560275495052338, + "step": 540 + }, + { + "dpo_loss": 0.5382309556007385, + "epoch": 3.089277279168635, + "grad_norm": 5.075388727380349, + "learning_rate": 1.9009702145406728e-06, + "logits": -3.102593421936035, + "logps": -204.09027099609375, + "loss": 0.0817, + "objective": 0.08249451220035553, + "ranking_idealized": 0.9041666388511658, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.8833333253860474, + "regularize": 0.08249450474977493, + "step": 545 + }, + { + "dpo_loss": 0.5309434533119202, + "epoch": 3.117619272555503, + "grad_norm": 4.919742025901051, + "learning_rate": 1.852952387243698e-06, + "logits": -2.887840747833252, + "logps": -203.031982421875, + "loss": 0.0891, + "objective": 0.0902470126748085, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.9375, + "regularize": 0.0902470126748085, + "step": 550 + }, + { + "epoch": 3.117619272555503, + "eval_dpo_loss": 0.6777102947235107, + "eval_logits": -3.195338249206543, + "eval_logps": -196.1659393310547, + "eval_loss": 0.39561545848846436, + "eval_objective": 0.39604148268699646, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5609503984451294, + "eval_regularize": 0.39604148268699646, + "eval_runtime": 259.2511, + "eval_samples_per_second": 22.334, + "eval_steps_per_second": 0.933, + "step": 550 + }, + { + "dpo_loss": 0.5161585211753845, + "epoch": 3.1459612659423715, + "grad_norm": 5.02329657218416, + "learning_rate": 1.8051890737811395e-06, + "logits": -3.032655954360962, + "logps": -205.76190185546875, + "loss": 0.0842, + "objective": 0.08457961678504944, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.949999988079071, + "regularize": 0.08457960933446884, + "step": 555 + }, + { + "dpo_loss": 0.5269332528114319, + "epoch": 3.1743032593292395, + "grad_norm": 5.022767770754425, + "learning_rate": 1.7576990616793139e-06, + "logits": -3.001573324203491, + "logps": -205.6256561279297, + "loss": 0.0777, + "objective": 0.07936005294322968, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.9041666388511658, + "regularize": 0.07936005294322968, + "step": 560 + }, + { + "dpo_loss": 0.5309363603591919, + "epoch": 3.2026452527161076, + "grad_norm": 5.117713576028772, + "learning_rate": 1.7105010309624381e-06, + "logits": -3.0092759132385254, + "logps": -198.69540405273438, + "loss": 0.0791, + "objective": 0.07759826630353928, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.8999999761581421, + "regularize": 0.07759825885295868, + "step": 565 + }, + { + "dpo_loss": 0.5266720652580261, + "epoch": 3.230987246102976, + "grad_norm": 4.836758469100523, + "learning_rate": 1.6636135468049122e-06, + "logits": -2.9470977783203125, + "logps": -202.33779907226562, + "loss": 0.0828, + "objective": 0.08297502994537354, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.4958333373069763, + "ranking_simple": 0.9041666388511658, + "regularize": 0.08297502249479294, + "step": 570 + }, + { + "dpo_loss": 0.5284319519996643, + "epoch": 3.259329239489844, + "grad_norm": 5.27684795086492, + "learning_rate": 1.617055052228768e-06, + "logits": -3.067121744155884, + "logps": -201.16802978515625, + "loss": 0.0794, + "objective": 0.08327650278806686, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.8999999761581421, + "regularize": 0.08327650278806686, + "step": 575 + }, + { + "dpo_loss": 0.5467706918716431, + "epoch": 3.287671232876712, + "grad_norm": 4.999181466831561, + "learning_rate": 1.5708438608491816e-06, + "logits": -3.0922207832336426, + "logps": -200.73341369628906, + "loss": 0.0809, + "objective": 0.08547008782625198, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.887499988079071, + "regularize": 0.08547007292509079, + "step": 580 + }, + { + "dpo_loss": 0.5353319644927979, + "epoch": 3.3160132262635806, + "grad_norm": 4.7454466484307485, + "learning_rate": 1.524998149670871e-06, + "logits": -3.148766040802002, + "logps": -199.28677368164062, + "loss": 0.0767, + "objective": 0.06919746100902557, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.887499988079071, + "regularize": 0.06919745355844498, + "step": 585 + }, + { + "dpo_loss": 0.5416805148124695, + "epoch": 3.3443552196504487, + "grad_norm": 4.994244494597657, + "learning_rate": 1.479535951938243e-06, + "logits": -3.191918134689331, + "logps": -201.75802612304688, + "loss": 0.0757, + "objective": 0.06989765167236328, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5291666388511658, + "ranking_simple": 0.8958333134651184, + "regularize": 0.06989765167236328, + "step": 590 + }, + { + "dpo_loss": 0.5297635197639465, + "epoch": 3.372697213037317, + "grad_norm": 5.147488031975634, + "learning_rate": 1.43447515004208e-06, + "logits": -3.0706212520599365, + "logps": -200.92311096191406, + "loss": 0.0775, + "objective": 0.0749397724866867, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.9166666865348816, + "regularize": 0.0749397724866867, + "step": 595 + }, + { + "dpo_loss": 0.511430561542511, + "epoch": 3.4010392064241852, + "grad_norm": 4.864631411349059, + "learning_rate": 1.3898334684855647e-06, + "logits": -3.051577091217041, + "logps": -202.49258422851562, + "loss": 0.0749, + "objective": 0.07237013429403305, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.9375, + "regularize": 0.07237013429403305, + "step": 600 + }, + { + "epoch": 3.4010392064241852, + "eval_dpo_loss": 0.6780565977096558, + "eval_logits": -3.1966371536254883, + "eval_logps": -194.1237335205078, + "eval_loss": 0.3962407112121582, + "eval_objective": 0.39730900526046753, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5743801593780518, + "eval_regularize": 0.39730900526046753, + "eval_runtime": 258.6121, + "eval_samples_per_second": 22.389, + "eval_steps_per_second": 0.936, + "step": 600 + }, + { + "dpo_loss": 0.5354328155517578, + "epoch": 3.4293811998110533, + "grad_norm": 5.220508290696569, + "learning_rate": 1.3456284669124159e-06, + "logits": -3.0896830558776855, + "logps": -204.61468505859375, + "loss": 0.0733, + "objective": 0.06684383749961853, + "ranking_idealized": 0.9166666865348816, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.9083333611488342, + "regularize": 0.06684383004903793, + "step": 605 + }, + { + "dpo_loss": 0.5366904139518738, + "epoch": 3.4577231931979218, + "grad_norm": 4.948012773738948, + "learning_rate": 1.301877533199859e-06, + "logits": -3.0734212398529053, + "logps": -203.69866943359375, + "loss": 0.0729, + "objective": 0.06990881264209747, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5375000238418579, + "ranking_simple": 0.8958333134651184, + "regularize": 0.06990881264209747, + "step": 610 + }, + { + "dpo_loss": 0.5279187560081482, + "epoch": 3.48606518658479, + "grad_norm": 5.16746605562179, + "learning_rate": 1.2585978766191726e-06, + "logits": -3.0537939071655273, + "logps": -202.3527374267578, + "loss": 0.0737, + "objective": 0.07423458993434906, + "ranking_idealized": 0.9083333611488342, + "ranking_idealized_expo": 0.4833333194255829, + "ranking_simple": 0.875, + "regularize": 0.07423458248376846, + "step": 615 + }, + { + "dpo_loss": 0.5187819004058838, + "epoch": 3.514407179971658, + "grad_norm": 4.801574670976317, + "learning_rate": 1.2158065210664848e-06, + "logits": -2.913203001022339, + "logps": -204.0981903076172, + "loss": 0.0707, + "objective": 0.06695393472909927, + "ranking_idealized": 0.8999999761581421, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.8999999761581421, + "regularize": 0.06695392727851868, + "step": 620 + }, + { + "dpo_loss": 0.5320748686790466, + "epoch": 3.5427491733585263, + "grad_norm": 4.901983573451279, + "learning_rate": 1.1735202983664803e-06, + "logits": -3.018667459487915, + "logps": -197.7013397216797, + "loss": 0.0682, + "objective": 0.06922433525323868, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.9333333373069763, + "regularize": 0.06922433525323868, + "step": 625 + }, + { + "dpo_loss": 0.5438559651374817, + "epoch": 3.5710911667453944, + "grad_norm": 4.916430683667445, + "learning_rate": 1.1317558416516696e-06, + "logits": -3.063880681991577, + "logps": -198.75747680664062, + "loss": 0.0704, + "objective": 0.07144972681999207, + "ranking_idealized": 0.9125000238418579, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.9125000238418579, + "regularize": 0.07144972681999207, + "step": 630 + }, + { + "dpo_loss": 0.5314496159553528, + "epoch": 3.5994331601322624, + "grad_norm": 4.767769596097874, + "learning_rate": 1.0905295788197993e-06, + "logits": -3.036067247390747, + "logps": -196.23695373535156, + "loss": 0.0647, + "objective": 0.05931680276989937, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.9333333373069763, + "regularize": 0.05931679159402847, + "step": 635 + }, + { + "dpo_loss": 0.5109093189239502, + "epoch": 3.627775153519131, + "grad_norm": 4.841523497854697, + "learning_rate": 1.049857726072005e-06, + "logits": -3.0281994342803955, + "logps": -211.7641143798828, + "loss": 0.0648, + "objective": 0.06795307993888855, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.9125000238418579, + "regularize": 0.06795307248830795, + "step": 640 + }, + { + "dpo_loss": 0.5220555067062378, + "epoch": 3.656117146905999, + "grad_norm": 4.756494572923249, + "learning_rate": 1.0097562815342215e-06, + "logits": -3.0403778553009033, + "logps": -197.11727905273438, + "loss": 0.0696, + "objective": 0.07045839726924896, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.8999999761581421, + "regularize": 0.07045838981866837, + "step": 645 + }, + { + "dpo_loss": 0.5448586344718933, + "epoch": 3.6844591402928675, + "grad_norm": 4.78589420445802, + "learning_rate": 9.702410189643838e-07, + "logits": -3.0378682613372803, + "logps": -199.30670166015625, + "loss": 0.062, + "objective": 0.06362789124250412, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.9208333492279053, + "regularize": 0.06362788379192352, + "step": 650 + }, + { + "epoch": 3.6844591402928675, + "eval_dpo_loss": 0.6778165698051453, + "eval_logits": -3.241185188293457, + "eval_logps": -195.3243865966797, + "eval_loss": 0.39558976888656616, + "eval_objective": 0.3966863453388214, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5702479481697083, + "eval_regularize": 0.3966863453388214, + "eval_runtime": 259.3281, + "eval_samples_per_second": 22.327, + "eval_steps_per_second": 0.933, + "step": 650 + }, + { + "dpo_loss": 0.5378040671348572, + "epoch": 3.7128011336797355, + "grad_norm": 4.991430717748734, + "learning_rate": 9.313274815478698e-07, + "logits": -3.06374454498291, + "logps": -206.94361877441406, + "loss": 0.0675, + "objective": 0.07482859492301941, + "ranking_idealized": 0.9166666865348816, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.8833333253860474, + "regularize": 0.07482858747243881, + "step": 655 + }, + { + "dpo_loss": 0.533437192440033, + "epoch": 3.7411431270666036, + "grad_norm": 4.914085991583788, + "learning_rate": 8.930309757836517e-07, + "logits": -3.1829257011413574, + "logps": -198.15655517578125, + "loss": 0.0645, + "objective": 0.0638025775551796, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.9458333253860474, + "regularize": 0.0638025775551796, + "step": 660 + }, + { + "dpo_loss": 0.5339053869247437, + "epoch": 3.769485120453472, + "grad_norm": 4.883348560718917, + "learning_rate": 8.553665654635343e-07, + "logits": -3.0380542278289795, + "logps": -192.95997619628906, + "loss": 0.0651, + "objective": 0.06292819231748581, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.9125000238418579, + "regularize": 0.06292817741632462, + "step": 665 + }, + { + "dpo_loss": 0.5281752943992615, + "epoch": 3.79782711384034, + "grad_norm": 5.904298344985475, + "learning_rate": 8.183490657468687e-07, + "logits": -3.175488233566284, + "logps": -201.79714965820312, + "loss": 0.0635, + "objective": 0.06745120882987976, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.612500011920929, + "ranking_simple": 0.9125000238418579, + "regularize": 0.06745120882987976, + "step": 670 + }, + { + "dpo_loss": 0.535234808921814, + "epoch": 3.826169107227208, + "grad_norm": 4.6497906673921685, + "learning_rate": 7.819930373330669e-07, + "logits": -3.079956531524658, + "logps": -195.46868896484375, + "loss": 0.0606, + "objective": 0.05796652287244797, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.9041666388511658, + "regularize": 0.05796651914715767, + "step": 675 + }, + { + "dpo_loss": 0.5492002964019775, + "epoch": 3.8545111006140766, + "grad_norm": 5.184144239252589, + "learning_rate": 7.463127807341966e-07, + "logits": -3.021759033203125, + "logps": -195.5998992919922, + "loss": 0.0607, + "objective": 0.0610785037279129, + "ranking_idealized": 0.9083333611488342, + "ranking_idealized_expo": 0.5249999761581421, + "ranking_simple": 0.862500011920929, + "regularize": 0.06107849255204201, + "step": 680 + }, + { + "dpo_loss": 0.5241864323616028, + "epoch": 3.8828530940009447, + "grad_norm": 5.005865678419639, + "learning_rate": 7.113223306499336e-07, + "logits": -3.1358683109283447, + "logps": -201.37371826171875, + "loss": 0.0629, + "objective": 0.06049242988228798, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.9125000238418579, + "regularize": 0.06049241125583649, + "step": 685 + }, + { + "dpo_loss": 0.5277370810508728, + "epoch": 3.9111950873878127, + "grad_norm": 4.853091570155952, + "learning_rate": 6.770354504470575e-07, + "logits": -3.0913193225860596, + "logps": -194.60806274414062, + "loss": 0.0576, + "objective": 0.05857709422707558, + "ranking_idealized": 0.9541666507720947, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.9333333373069763, + "regularize": 0.05857709422707558, + "step": 690 + }, + { + "dpo_loss": 0.5350156426429749, + "epoch": 3.9395370807746812, + "grad_norm": 4.643145060858906, + "learning_rate": 6.434656267456843e-07, + "logits": -3.007568836212158, + "logps": -196.4861297607422, + "loss": 0.062, + "objective": 0.06293628364801407, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.49166667461395264, + "ranking_simple": 0.9166666865348816, + "regularize": 0.06293627619743347, + "step": 695 + }, + { + "dpo_loss": 0.537192165851593, + "epoch": 3.9678790741615493, + "grad_norm": 4.752148657093376, + "learning_rate": 6.106260641143547e-07, + "logits": -3.088932991027832, + "logps": -200.46910095214844, + "loss": 0.0583, + "objective": 0.06041649729013443, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.5041666626930237, + "ranking_simple": 0.9375, + "regularize": 0.06041649356484413, + "step": 700 + }, + { + "epoch": 3.9678790741615493, + "eval_dpo_loss": 0.677208423614502, + "eval_logits": -3.2432026863098145, + "eval_logps": -196.44686889648438, + "eval_loss": 0.3955562114715576, + "eval_objective": 0.3960891366004944, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.5640496015548706, + "eval_regularize": 0.3960891366004944, + "eval_runtime": 258.7136, + "eval_samples_per_second": 22.38, + "eval_steps_per_second": 0.935, + "step": 700 + }, + { + "dpo_loss": 0.5246204733848572, + "epoch": 3.9962210675484178, + "grad_norm": 4.771032845540052, + "learning_rate": 5.785296798760601e-07, + "logits": -3.013643980026245, + "logps": -202.21218872070312, + "loss": 0.0575, + "objective": 0.04958561435341835, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.9208333492279053, + "regularize": 0.049585599452257156, + "step": 705 + }, + { + "dpo_loss": 0.5198561549186707, + "epoch": 4.024563060935286, + "grad_norm": 4.697507600765225, + "learning_rate": 5.471890990272666e-07, + "logits": -3.1067426204681396, + "logps": -207.82223510742188, + "loss": 0.0497, + "objective": 0.054764509201049805, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.9166666865348816, + "regularize": 0.05476450175046921, + "step": 710 + }, + { + "dpo_loss": 0.5302870869636536, + "epoch": 4.052905054322154, + "grad_norm": 4.631394448060559, + "learning_rate": 5.166166492719124e-07, + "logits": -3.0587379932403564, + "logps": -204.6709442138672, + "loss": 0.0462, + "objective": 0.044012073427438736, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.9125000238418579, + "regularize": 0.04401206597685814, + "step": 715 + }, + { + "dpo_loss": 0.5232208371162415, + "epoch": 4.081247047709022, + "grad_norm": 4.929724097820593, + "learning_rate": 4.868243561723535e-07, + "logits": -2.9354002475738525, + "logps": -205.88121032714844, + "loss": 0.0468, + "objective": 0.053409043699502945, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.925000011920929, + "regularize": 0.05340903252363205, + "step": 720 + }, + { + "dpo_loss": 0.5423314571380615, + "epoch": 4.109589041095891, + "grad_norm": 4.739841142104876, + "learning_rate": 4.57823938419153e-07, + "logits": -3.0645394325256348, + "logps": -200.61724853515625, + "loss": 0.0479, + "objective": 0.04531220719218254, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5541666746139526, + "ranking_simple": 0.9208333492279053, + "regularize": 0.045312199741601944, + "step": 725 + }, + { + "dpo_loss": 0.5259865522384644, + "epoch": 4.137931034482759, + "grad_norm": 4.6973617362423665, + "learning_rate": 4.2962680322157335e-07, + "logits": -3.1625542640686035, + "logps": -201.07965087890625, + "loss": 0.0486, + "objective": 0.049515120685100555, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.887499988079071, + "regularize": 0.04951511323451996, + "step": 730 + }, + { + "dpo_loss": 0.5347721576690674, + "epoch": 4.166273027869627, + "grad_norm": 4.718934829997983, + "learning_rate": 4.0224404182059443e-07, + "logits": -3.0613696575164795, + "logps": -204.30772399902344, + "loss": 0.0439, + "objective": 0.04426734894514084, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.949999988079071, + "regularize": 0.04426734521985054, + "step": 735 + }, + { + "dpo_loss": 0.5107501149177551, + "epoch": 4.194615021256495, + "grad_norm": 4.7379442831945635, + "learning_rate": 3.756864251262143e-07, + "logits": -3.04003643989563, + "logps": -202.8253631591797, + "loss": 0.0459, + "objective": 0.04314772039651871, + "ranking_idealized": 0.9583333134651184, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.949999988079071, + "regularize": 0.043147701770067215, + "step": 740 + }, + { + "dpo_loss": 0.5249863266944885, + "epoch": 4.222957014643363, + "grad_norm": 4.71988199553104, + "learning_rate": 3.499643994807486e-07, + "logits": -3.1296160221099854, + "logps": -198.77182006835938, + "loss": 0.046, + "objective": 0.045759402215480804, + "ranking_idealized": 0.9166666865348816, + "ranking_idealized_expo": 0.5083333253860474, + "ranking_simple": 0.8958333134651184, + "regularize": 0.04575938731431961, + "step": 745 + }, + { + "dpo_loss": 0.522843599319458, + "epoch": 4.251299008030231, + "grad_norm": 4.672227955095767, + "learning_rate": 3.250880825498026e-07, + "logits": -3.2786660194396973, + "logps": -199.6768035888672, + "loss": 0.0451, + "objective": 0.049515463411808014, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.9166666865348816, + "regularize": 0.04951544478535652, + "step": 750 + }, + { + "epoch": 4.251299008030231, + "eval_dpo_loss": 0.6771067380905151, + "eval_logits": -3.266589403152466, + "eval_logps": -195.43978881835938, + "eval_loss": 0.39519038796424866, + "eval_objective": 0.3954727351665497, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.567148745059967, + "eval_regularize": 0.3954727351665497, + "eval_runtime": 258.4936, + "eval_samples_per_second": 22.399, + "eval_steps_per_second": 0.936, + "step": 750 + }, + { + "dpo_loss": 0.528011679649353, + "epoch": 4.2796410014171, + "grad_norm": 5.004941031227222, + "learning_rate": 3.0106725934252095e-07, + "logits": -3.2007675170898438, + "logps": -196.980224609375, + "loss": 0.0469, + "objective": 0.052510153502225876, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.9166666865348816, + "regularize": 0.05251014232635498, + "step": 755 + }, + { + "dpo_loss": 0.5268819332122803, + "epoch": 4.307982994803968, + "grad_norm": 4.8743952598559135, + "learning_rate": 2.779113783626916e-07, + "logits": -3.166001796722412, + "logps": -206.85211181640625, + "loss": 0.0436, + "objective": 0.04319094866514206, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5874999761581421, + "ranking_simple": 0.9291666746139526, + "regularize": 0.04319094493985176, + "step": 760 + }, + { + "dpo_loss": 0.5119529962539673, + "epoch": 4.336324988190836, + "grad_norm": 4.7936404352453845, + "learning_rate": 2.5562954789221164e-07, + "logits": -3.224353790283203, + "logps": -204.93324279785156, + "loss": 0.0447, + "objective": 0.04502396285533905, + "ranking_idealized": 0.9624999761581421, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.9583333134651184, + "regularize": 0.04502394422888756, + "step": 765 + }, + { + "dpo_loss": 0.5143262147903442, + "epoch": 4.364666981577704, + "grad_norm": 4.845346036030975, + "learning_rate": 2.3423053240837518e-07, + "logits": -3.086646318435669, + "logps": -200.40354919433594, + "loss": 0.0447, + "objective": 0.04372342303395271, + "ranking_idealized": 0.9666666388511658, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.9708333611488342, + "regularize": 0.043723396956920624, + "step": 770 + }, + { + "dpo_loss": 0.5219811797142029, + "epoch": 4.393008974964572, + "grad_norm": 4.882318635829277, + "learning_rate": 2.137227491364016e-07, + "logits": -3.1227707862854004, + "logps": -202.45298767089844, + "loss": 0.0431, + "objective": 0.042134013026952744, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.9125000238418579, + "regularize": 0.04213400185108185, + "step": 775 + }, + { + "dpo_loss": 0.5324522852897644, + "epoch": 4.42135096835144, + "grad_norm": 5.178173799388018, + "learning_rate": 1.941142647385469e-07, + "logits": -3.1812171936035156, + "logps": -196.50355529785156, + "loss": 0.0404, + "objective": 0.039291638880968094, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.925000011920929, + "regularize": 0.0392916202545166, + "step": 780 + }, + { + "dpo_loss": 0.543372392654419, + "epoch": 4.449692961738309, + "grad_norm": 4.563712486054605, + "learning_rate": 1.7541279214111277e-07, + "logits": -3.204663038253784, + "logps": -198.68594360351562, + "loss": 0.0495, + "objective": 0.05586666613817215, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.9291666746139526, + "regularize": 0.05586665868759155, + "step": 785 + }, + { + "dpo_loss": 0.5179670453071594, + "epoch": 4.478034955125177, + "grad_norm": 4.962116136464137, + "learning_rate": 1.5762568750059604e-07, + "logits": -3.1283469200134277, + "logps": -203.68194580078125, + "loss": 0.0426, + "objective": 0.04570373520255089, + "ranking_idealized": 0.949999988079071, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.9541666507720947, + "regularize": 0.04570373147726059, + "step": 790 + }, + { + "dpo_loss": 0.525623083114624, + "epoch": 4.506376948512045, + "grad_norm": 4.808999753603184, + "learning_rate": 1.4075994731016895e-07, + "logits": -3.018510580062866, + "logps": -205.88327026367188, + "loss": 0.0435, + "objective": 0.036049842834472656, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5625, + "ranking_simple": 0.925000011920929, + "regularize": 0.03604983165860176, + "step": 795 + }, + { + "dpo_loss": 0.5233331918716431, + "epoch": 4.534718941898913, + "grad_norm": 4.734460423236949, + "learning_rate": 1.2482220564763669e-07, + "logits": -3.0628395080566406, + "logps": -201.49278259277344, + "loss": 0.0438, + "objective": 0.04488484933972359, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.925000011920929, + "regularize": 0.04488483443856239, + "step": 800 + }, + { + "epoch": 4.534718941898913, + "eval_dpo_loss": 0.6771342158317566, + "eval_logits": -3.2692906856536865, + "eval_logps": -195.2318878173828, + "eval_loss": 0.39515408873558044, + "eval_objective": 0.39557480812072754, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.56611567735672, + "eval_regularize": 0.39557480812072754, + "eval_runtime": 258.9479, + "eval_samples_per_second": 22.36, + "eval_steps_per_second": 0.935, + "step": 800 + }, + { + "dpo_loss": 0.5237378478050232, + "epoch": 4.563060935285781, + "grad_norm": 4.695279873765779, + "learning_rate": 1.0981873156594381e-07, + "logits": -3.0811893939971924, + "logps": -198.17877197265625, + "loss": 0.0405, + "objective": 0.04346688091754913, + "ranking_idealized": 0.8999999761581421, + "ranking_idealized_expo": 0.5, + "ranking_simple": 0.8958333134651184, + "regularize": 0.043466873466968536, + "step": 805 + }, + { + "dpo_loss": 0.5305168032646179, + "epoch": 4.59140292867265, + "grad_norm": 4.641921422533774, + "learning_rate": 9.575542662726756e-08, + "logits": -3.176128387451172, + "logps": -197.74447631835938, + "loss": 0.0422, + "objective": 0.036040760576725006, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.47083333134651184, + "ranking_simple": 0.9166666865348816, + "regularize": 0.03604074567556381, + "step": 810 + }, + { + "dpo_loss": 0.5344857573509216, + "epoch": 4.619744922059518, + "grad_norm": 4.571244888107197, + "learning_rate": 8.26378225816582e-08, + "logits": -3.02875018119812, + "logps": -193.68545532226562, + "loss": 0.0411, + "objective": 0.03461510315537453, + "ranking_idealized": 0.9416666626930237, + "ranking_idealized_expo": 0.6333333253860474, + "ranking_simple": 0.925000011920929, + "regularize": 0.03461508825421333, + "step": 815 + }, + { + "dpo_loss": 0.5378891825675964, + "epoch": 4.648086915446386, + "grad_norm": 4.696564500526644, + "learning_rate": 7.047107919114588e-08, + "logits": -3.148911952972412, + "logps": -204.34703063964844, + "loss": 0.0412, + "objective": 0.040915556252002716, + "ranking_idealized": 0.9333333373069763, + "ranking_idealized_expo": 0.574999988079071, + "ranking_simple": 0.925000011920929, + "regularize": 0.04091554507613182, + "step": 820 + }, + { + "dpo_loss": 0.5378555059432983, + "epoch": 4.6764289088332545, + "grad_norm": 4.6443623208845795, + "learning_rate": 5.92599822001666e-08, + "logits": -3.0313339233398438, + "logps": -200.1685028076172, + "loss": 0.0388, + "objective": 0.034796856343746185, + "ranking_idealized": 0.9125000238418579, + "ranking_idealized_expo": 0.5166666507720947, + "ranking_simple": 0.8999999761581421, + "regularize": 0.03479684516787529, + "step": 825 + }, + { + "dpo_loss": 0.5235874056816101, + "epoch": 4.7047709022201225, + "grad_norm": 4.667685116333195, + "learning_rate": 4.9008941453107527e-08, + "logits": -3.2229866981506348, + "logps": -199.17506408691406, + "loss": 0.043, + "objective": 0.04358634725213051, + "ranking_idealized": 0.925000011920929, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.9166666865348816, + "regularize": 0.043586332350969315, + "step": 830 + }, + { + "dpo_loss": 0.5147577524185181, + "epoch": 4.733112895606991, + "grad_norm": 4.858854755852941, + "learning_rate": 3.972198915970976e-08, + "logits": -3.1338717937469482, + "logps": -205.56285095214844, + "loss": 0.0391, + "objective": 0.04283083602786064, + "ranking_idealized": 0.9291666746139526, + "ranking_idealized_expo": 0.512499988079071, + "ranking_simple": 0.8999999761581421, + "regularize": 0.04283082112669945, + "step": 835 + }, + { + "dpo_loss": 0.5037484765052795, + "epoch": 4.7614548889938595, + "grad_norm": 4.877052385921156, + "learning_rate": 3.1402778309014284e-08, + "logits": -3.141592502593994, + "logps": -206.25045776367188, + "loss": 0.0442, + "objective": 0.04478234797716141, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.5583333373069763, + "ranking_simple": 0.9375, + "regularize": 0.04478234425187111, + "step": 840 + }, + { + "dpo_loss": 0.5356060266494751, + "epoch": 4.7897968823807275, + "grad_norm": 4.719985877621544, + "learning_rate": 2.4054581232470785e-08, + "logits": -3.155550241470337, + "logps": -196.71856689453125, + "loss": 0.0404, + "objective": 0.037482328712940216, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.48750001192092896, + "ranking_simple": 0.9166666865348816, + "regularize": 0.03748232498764992, + "step": 845 + }, + { + "dpo_loss": 0.5255146026611328, + "epoch": 4.818138875767596, + "grad_norm": 5.213474190658553, + "learning_rate": 1.768028831677926e-08, + "logits": -3.0956904888153076, + "logps": -199.34555053710938, + "loss": 0.0408, + "objective": 0.03515857085585594, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5458333492279053, + "ranking_simple": 0.9125000238418579, + "regularize": 0.03515855222940445, + "step": 850 + }, + { + "epoch": 4.818138875767596, + "eval_dpo_loss": 0.6770739555358887, + "eval_logits": -3.2704193592071533, + "eval_logps": -195.50953674316406, + "eval_loss": 0.3951335549354553, + "eval_objective": 0.395561158657074, + "eval_ranking_idealized": 0.9194214940071106, + "eval_ranking_idealized_expo": 0.5309917330741882, + "eval_ranking_simple": 0.56611567735672, + "eval_regularize": 0.395561158657074, + "eval_runtime": 259.1953, + "eval_samples_per_second": 22.338, + "eval_steps_per_second": 0.934, + "step": 850 + }, + { + "dpo_loss": 0.5365945100784302, + "epoch": 4.846480869154464, + "grad_norm": 4.6357618577755, + "learning_rate": 1.2282406866966078e-08, + "logits": -3.0836923122406006, + "logps": -202.77923583984375, + "loss": 0.0369, + "objective": 0.03312551975250244, + "ranking_idealized": 0.9583333134651184, + "ranking_idealized_expo": 0.5666666626930237, + "ranking_simple": 0.9375, + "regularize": 0.033125489950180054, + "step": 855 + }, + { + "dpo_loss": 0.5177373290061951, + "epoch": 4.874822862541333, + "grad_norm": 5.087309484062101, + "learning_rate": 7.863060120144316e-09, + "logits": -3.0513181686401367, + "logps": -197.80010986328125, + "loss": 0.0392, + "objective": 0.04462633281946182, + "ranking_idealized": 0.9125000238418579, + "ranking_idealized_expo": 0.5791666507720947, + "ranking_simple": 0.9208333492279053, + "regularize": 0.04462629556655884, + "step": 860 + }, + { + "dpo_loss": 0.5258888006210327, + "epoch": 4.903164855928201, + "grad_norm": 4.542109406021276, + "learning_rate": 4.423986410346526e-09, + "logits": -3.1244866847991943, + "logps": -196.3852081298828, + "loss": 0.0396, + "objective": 0.05519362911581993, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5333333611488342, + "ranking_simple": 0.9375, + "regularize": 0.055193621665239334, + "step": 865 + }, + { + "dpo_loss": 0.5206807851791382, + "epoch": 4.931506849315069, + "grad_norm": 4.732937484587474, + "learning_rate": 1.9665384847583622e-09, + "logits": -3.1762211322784424, + "logps": -202.62925720214844, + "loss": 0.0378, + "objective": 0.0369645431637764, + "ranking_idealized": 0.9208333492279053, + "ranking_idealized_expo": 0.5416666865348816, + "ranking_simple": 0.9166666865348816, + "regularize": 0.0369645357131958, + "step": 870 + }, + { + "dpo_loss": 0.5267921686172485, + "epoch": 4.959848842701937, + "grad_norm": 4.9576694103498475, + "learning_rate": 4.916829716183901e-10, + "logits": -3.104861259460449, + "logps": -203.89500427246094, + "loss": 0.041, + "objective": 0.03402137756347656, + "ranking_idealized": 0.9375, + "ranking_idealized_expo": 0.5916666388511658, + "ranking_simple": 0.9208333492279053, + "regularize": 0.03402136638760567, + "step": 875 + }, + { + "dpo_loss": 0.5229869484901428, + "epoch": 4.988190836088805, + "grad_norm": 4.696769247024194, + "learning_rate": 0.0, + "logits": -3.1990513801574707, + "logps": -196.47311401367188, + "loss": 0.0352, + "objective": 0.0322914645075798, + "ranking_idealized": 0.9458333253860474, + "ranking_idealized_expo": 0.550000011920929, + "ranking_simple": 0.9458333253860474, + "regularize": 0.03229145333170891, + "step": 880 + }, + { + "epoch": 4.988190836088805, + "step": 880, + "total_flos": 0.0, + "train_loss": 0.1496292933313684, + "train_runtime": 35141.4223, + "train_samples_per_second": 7.228, + "train_steps_per_second": 0.025 + } + ], + "logging_steps": 5, + "max_steps": 880, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}