{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.988190836088805, "eval_steps": 50, "global_step": 880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.005668398677373642, "grad_norm": 13.413595204669061, "learning_rate": 5.681818181818182e-08, "logits": -1.3147305250167847, "logps": -88.0877456665039, "loss": 0.4113, "objective": 0.41588976979255676, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.41588976979255676, "step": 1 }, { "dpo_loss": 0.6931002736091614, "epoch": 0.02834199338686821, "grad_norm": 13.427332141277578, "learning_rate": 2.840909090909091e-07, "logits": -1.3680692911148071, "logps": -84.26158905029297, "loss": 0.4129, "objective": 0.3754810094833374, "ranking_idealized": 0.6510416865348816, "ranking_idealized_expo": 0.5572916865348816, "ranking_simple": 0.546875, "regularize": 0.3754810094833374, "step": 5 }, { "dpo_loss": 0.6900920867919922, "epoch": 0.05668398677373642, "grad_norm": 12.67240246603534, "learning_rate": 5.681818181818182e-07, "logits": -1.4469478130340576, "logps": -82.44185638427734, "loss": 0.4149, "objective": 0.43780091404914856, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5166666507720947, "regularize": 0.43780091404914856, "step": 10 }, { "dpo_loss": 0.6891883015632629, "epoch": 0.08502598016060463, "grad_norm": 12.900443318480342, "learning_rate": 8.522727272727273e-07, "logits": -1.4273536205291748, "logps": -81.69231414794922, "loss": 0.419, "objective": 0.40471941232681274, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.512499988079071, "regularize": 0.40471941232681274, "step": 15 }, { "dpo_loss": 0.6918003559112549, "epoch": 0.11336797354747284, "grad_norm": 14.313913436693964, "learning_rate": 1.1363636363636364e-06, "logits": -1.437472939491272, "logps": -82.81884765625, "loss": 0.4037, "objective": 0.403365820646286, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5333333611488342, "regularize": 0.403365820646286, "step": 20 }, { "dpo_loss": 0.6768646836280823, "epoch": 0.14170996693434104, "grad_norm": 13.58368371280586, "learning_rate": 1.4204545454545458e-06, "logits": -1.5096564292907715, "logps": -82.65319061279297, "loss": 0.3931, "objective": 0.409546822309494, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5041666626930237, "regularize": 0.409546822309494, "step": 25 }, { "dpo_loss": 0.6711666584014893, "epoch": 0.17005196032120926, "grad_norm": 13.105526207742551, "learning_rate": 1.7045454545454546e-06, "logits": -1.5050361156463623, "logps": -83.46080780029297, "loss": 0.3841, "objective": 0.38422220945358276, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5666666626930237, "regularize": 0.38422220945358276, "step": 30 }, { "dpo_loss": 0.6604105234146118, "epoch": 0.19839395370807747, "grad_norm": 14.531223489181945, "learning_rate": 1.9886363636363638e-06, "logits": -1.5017287731170654, "logps": -84.02853393554688, "loss": 0.3722, "objective": 0.35262614488601685, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5333333611488342, "regularize": 0.35262614488601685, "step": 35 }, { "dpo_loss": 0.6500855684280396, "epoch": 0.22673594709494568, "grad_norm": 14.371511230761167, "learning_rate": 2.2727272727272728e-06, "logits": -1.5274395942687988, "logps": -84.69414520263672, "loss": 0.379, "objective": 0.3910427689552307, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5583333373069763, "regularize": 0.3910427689552307, "step": 40 }, { "dpo_loss": 0.639613151550293, "epoch": 0.25507794048181387, "grad_norm": 17.129866291690814, "learning_rate": 2.556818181818182e-06, "logits": -1.6906952857971191, "logps": -86.7696304321289, "loss": 0.3764, "objective": 0.4166857898235321, "ranking_idealized": 0.7208333611488342, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5458333492279053, "regularize": 0.4166857898235321, "step": 45 }, { "dpo_loss": 0.637481153011322, "epoch": 0.2834199338686821, "grad_norm": 12.595130895032831, "learning_rate": 2.8409090909090916e-06, "logits": -1.604835033416748, "logps": -87.7275161743164, "loss": 0.3566, "objective": 0.3444138169288635, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.5458333492279053, "regularize": 0.3444138169288635, "step": 50 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6897606253623962, "eval_logits": -1.6203083992004395, "eval_logps": -96.4831314086914, "eval_loss": 0.413291871547699, "eval_objective": 0.4237224757671356, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5165289044380188, "eval_regularize": 0.4237224757671356, "eval_runtime": 259.6612, "eval_samples_per_second": 22.298, "eval_steps_per_second": 0.932, "step": 50 }, { "dpo_loss": 0.6459915041923523, "epoch": 0.3117619272555503, "grad_norm": 12.272365561333357, "learning_rate": 3.125e-06, "logits": -1.6675435304641724, "logps": -91.76095581054688, "loss": 0.3508, "objective": 0.359805166721344, "ranking_idealized": 0.7124999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5625, "regularize": 0.359805166721344, "step": 55 }, { "dpo_loss": 0.6238431334495544, "epoch": 0.3401039206424185, "grad_norm": 13.401317383285344, "learning_rate": 3.409090909090909e-06, "logits": -1.6435742378234863, "logps": -88.34529113769531, "loss": 0.3357, "objective": 0.3332988917827606, "ranking_idealized": 0.7041666507720947, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5791666507720947, "regularize": 0.3332988917827606, "step": 60 }, { "dpo_loss": 0.6304014325141907, "epoch": 0.3684459140292867, "grad_norm": 12.713715258382189, "learning_rate": 3.6931818181818186e-06, "logits": -1.5630245208740234, "logps": -86.62226867675781, "loss": 0.3325, "objective": 0.3380378782749176, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5458333492279053, "regularize": 0.3380378782749176, "step": 65 }, { "dpo_loss": 0.627398669719696, "epoch": 0.39678790741615494, "grad_norm": 12.34517477378158, "learning_rate": 3.9772727272727275e-06, "logits": -1.519757628440857, "logps": -88.53465270996094, "loss": 0.3261, "objective": 0.3733421266078949, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5833333134651184, "regularize": 0.3733421266078949, "step": 70 }, { "dpo_loss": 0.618600070476532, "epoch": 0.42512990080302315, "grad_norm": 13.394680047264217, "learning_rate": 4.2613636363636365e-06, "logits": -1.444022297859192, "logps": -86.64772033691406, "loss": 0.3211, "objective": 0.3317987322807312, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.612500011920929, "regularize": 0.3317987322807312, "step": 75 }, { "dpo_loss": 0.6121571660041809, "epoch": 0.45347189418989137, "grad_norm": 12.637476273331325, "learning_rate": 4.5454545454545455e-06, "logits": -1.5524269342422485, "logps": -82.45478057861328, "loss": 0.3164, "objective": 0.33297327160835266, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5833333134651184, "regularize": 0.33297327160835266, "step": 80 }, { "dpo_loss": 0.5934053659439087, "epoch": 0.4818138875767596, "grad_norm": 11.681739450816789, "learning_rate": 4.829545454545455e-06, "logits": -1.5023764371871948, "logps": -83.81694030761719, "loss": 0.3085, "objective": 0.2888755202293396, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6166666746139526, "regularize": 0.2888755202293396, "step": 85 }, { "dpo_loss": 0.6050879955291748, "epoch": 0.5101558809636277, "grad_norm": 11.586500005637587, "learning_rate": 4.999921328558333e-06, "logits": -1.246580958366394, "logps": -88.33258819580078, "loss": 0.3085, "objective": 0.3082594871520996, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5458333492279053, "regularize": 0.3082594871520996, "step": 90 }, { "dpo_loss": 0.5785138010978699, "epoch": 0.538497874350496, "grad_norm": 11.1585869550919, "learning_rate": 4.999036331701828e-06, "logits": -1.2436394691467285, "logps": -85.56443786621094, "loss": 0.3059, "objective": 0.293775349855423, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6208333373069763, "regularize": 0.293775349855423, "step": 95 }, { "dpo_loss": 0.5692493319511414, "epoch": 0.5668398677373642, "grad_norm": 10.57766394132754, "learning_rate": 4.997168347957521e-06, "logits": -1.3053488731384277, "logps": -83.31777954101562, "loss": 0.3027, "objective": 0.31831109523773193, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6083333492279053, "regularize": 0.31831109523773193, "step": 100 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.686188817024231, "eval_logits": -1.3063371181488037, "eval_logps": -88.41004180908203, "eval_loss": 0.41417956352233887, "eval_objective": 0.41506800055503845, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5216942429542542, "eval_regularize": 0.41506800055503845, "eval_runtime": 259.1694, "eval_samples_per_second": 22.341, "eval_steps_per_second": 0.934, "step": 100 }, { "dpo_loss": 0.5781983733177185, "epoch": 0.5951818611242324, "grad_norm": 10.454612496016592, "learning_rate": 4.994318112090048e-06, "logits": -1.1954314708709717, "logps": -83.58454132080078, "loss": 0.3006, "objective": 0.31248193979263306, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.637499988079071, "regularize": 0.31248193979263306, "step": 105 }, { "dpo_loss": 0.5739426016807556, "epoch": 0.6235238545111006, "grad_norm": 10.705962678396897, "learning_rate": 4.990486745229364e-06, "logits": -1.4220352172851562, "logps": -82.55160522460938, "loss": 0.3033, "objective": 0.31312334537506104, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.6000000238418579, "regularize": 0.31312334537506104, "step": 110 }, { "dpo_loss": 0.5884331464767456, "epoch": 0.6518658478979689, "grad_norm": 10.243480321598604, "learning_rate": 4.985675754429744e-06, "logits": -1.5664387941360474, "logps": -80.2437515258789, "loss": 0.2914, "objective": 0.29273518919944763, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5708333253860474, "regularize": 0.29273518919944763, "step": 115 }, { "dpo_loss": 0.5871846079826355, "epoch": 0.680207841284837, "grad_norm": 9.272918607237454, "learning_rate": 4.9798870320769884e-06, "logits": -1.5533816814422607, "logps": -77.25326538085938, "loss": 0.2962, "objective": 0.27850577235221863, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6166666746139526, "regularize": 0.27850577235221863, "step": 120 }, { "dpo_loss": 0.5849189162254333, "epoch": 0.7085498346717053, "grad_norm": 9.087139481090837, "learning_rate": 4.973122855144066e-06, "logits": -1.4378304481506348, "logps": -76.45697784423828, "loss": 0.286, "objective": 0.27717408537864685, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6541666388511658, "regularize": 0.27717408537864685, "step": 125 }, { "dpo_loss": 0.5918333530426025, "epoch": 0.7368918280585735, "grad_norm": 9.798401176649499, "learning_rate": 4.965385884295467e-06, "logits": -1.5077089071273804, "logps": -75.78624725341797, "loss": 0.2914, "objective": 0.29082292318344116, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.5625, "regularize": 0.29082292318344116, "step": 130 }, { "dpo_loss": 0.5844586491584778, "epoch": 0.7652338214454416, "grad_norm": 9.388728292193363, "learning_rate": 4.956679162840646e-06, "logits": -1.4935728311538696, "logps": -77.08039093017578, "loss": 0.2751, "objective": 0.27092453837394714, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5708333253860474, "regularize": 0.27092453837394714, "step": 135 }, { "dpo_loss": 0.5776726007461548, "epoch": 0.7935758148323099, "grad_norm": 9.368061718815978, "learning_rate": 4.947006115536947e-06, "logits": -1.344637393951416, "logps": -79.50406646728516, "loss": 0.278, "objective": 0.272522896528244, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5708333253860474, "regularize": 0.272522896528244, "step": 140 }, { "dpo_loss": 0.583095908164978, "epoch": 0.821917808219178, "grad_norm": 8.887316448589162, "learning_rate": 4.9363705472424825e-06, "logits": -1.260974645614624, "logps": -80.34443664550781, "loss": 0.2702, "objective": 0.27244552969932556, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5791666507720947, "regularize": 0.27244552969932556, "step": 145 }, { "dpo_loss": 0.5877144932746887, "epoch": 0.8502598016060463, "grad_norm": 8.678793910307522, "learning_rate": 4.924776641419513e-06, "logits": -1.168263554573059, "logps": -79.24138641357422, "loss": 0.2706, "objective": 0.26840564608573914, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6625000238418579, "regularize": 0.26840564608573914, "step": 150 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6856931447982788, "eval_logits": -1.1981431245803833, "eval_logps": -87.367431640625, "eval_loss": 0.42615318298339844, "eval_objective": 0.42774394154548645, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5278925895690918, "eval_regularize": 0.42774394154548645, "eval_runtime": 258.5616, "eval_samples_per_second": 22.393, "eval_steps_per_second": 0.936, "step": 150 }, { "dpo_loss": 0.5757229924201965, "epoch": 0.8786017949929145, "grad_norm": 9.10049250548801, "learning_rate": 4.9122289584888926e-06, "logits": -1.1876070499420166, "logps": -79.34397888183594, "loss": 0.2634, "objective": 0.2684144973754883, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6416666507720947, "regularize": 0.2684144973754883, "step": 155 }, { "dpo_loss": 0.5666177868843079, "epoch": 0.9069437883797827, "grad_norm": 8.820116531265606, "learning_rate": 4.8987324340362445e-06, "logits": -1.129431962966919, "logps": -79.80657196044922, "loss": 0.2548, "objective": 0.24889370799064636, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6208333373069763, "regularize": 0.24889370799064636, "step": 160 }, { "dpo_loss": 0.5674318671226501, "epoch": 0.9352857817666509, "grad_norm": 8.768643401715794, "learning_rate": 4.884292376870567e-06, "logits": -1.08602774143219, "logps": -80.07511138916016, "loss": 0.2569, "objective": 0.24699881672859192, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6416666507720947, "regularize": 0.24699881672859192, "step": 165 }, { "dpo_loss": 0.5818125605583191, "epoch": 0.9636277751535192, "grad_norm": 8.727560270319945, "learning_rate": 4.868914466936038e-06, "logits": -1.1042813062667847, "logps": -81.02680206298828, "loss": 0.2644, "objective": 0.2629285454750061, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6166666746139526, "regularize": 0.2629285454750061, "step": 170 }, { "dpo_loss": 0.5690818428993225, "epoch": 0.9919697685403873, "grad_norm": 9.360612159540104, "learning_rate": 4.8526047530778175e-06, "logits": -1.0876260995864868, "logps": -81.20608520507812, "loss": 0.257, "objective": 0.2705513834953308, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.675000011920929, "regularize": 0.2705513834953308, "step": 175 }, { "dpo_loss": 0.5392836332321167, "epoch": 1.0203117619272555, "grad_norm": 8.811963294729349, "learning_rate": 4.835369650662767e-06, "logits": -1.2233085632324219, "logps": -79.34477233886719, "loss": 0.2467, "objective": 0.25315728783607483, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6499999761581421, "regularize": 0.25315728783607483, "step": 180 }, { "dpo_loss": 0.5429127812385559, "epoch": 1.0486537553141237, "grad_norm": 7.990993827783984, "learning_rate": 4.817215939055984e-06, "logits": -1.0636595487594604, "logps": -77.5766830444336, "loss": 0.2395, "objective": 0.23463593423366547, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6083333492279053, "regularize": 0.23463593423366547, "step": 185 }, { "dpo_loss": 0.5637180209159851, "epoch": 1.076995748700992, "grad_norm": 8.256999336661497, "learning_rate": 4.798150758954164e-06, "logits": -1.0749539136886597, "logps": -78.55563354492188, "loss": 0.2191, "objective": 0.21444876492023468, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6541666388511658, "regularize": 0.2144487500190735, "step": 190 }, { "dpo_loss": 0.5680408477783203, "epoch": 1.10533774208786, "grad_norm": 8.413792498503495, "learning_rate": 4.778181609576832e-06, "logits": -1.1007658243179321, "logps": -77.28519439697266, "loss": 0.2178, "objective": 0.21195411682128906, "ranking_idealized": 0.7041666507720947, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6666666865348816, "regularize": 0.21195411682128906, "step": 195 }, { "dpo_loss": 0.5640944838523865, "epoch": 1.1336797354747283, "grad_norm": 7.655804437668192, "learning_rate": 4.757316345716554e-06, "logits": -1.2176551818847656, "logps": -76.59423828125, "loss": 0.2256, "objective": 0.2188282459974289, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.2188282459974289, "step": 200 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6861998438835144, "eval_logits": -1.2023168802261353, "eval_logps": -81.81192016601562, "eval_loss": 0.43468645215034485, "eval_objective": 0.434445858001709, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5247933864593506, "eval_regularize": 0.434445858001709, "eval_runtime": 259.1196, "eval_samples_per_second": 22.345, "eval_steps_per_second": 0.934, "step": 200 }, { "dpo_loss": 0.5600470304489136, "epoch": 1.1620217288615966, "grad_norm": 7.695005611618307, "learning_rate": 4.735563174649278e-06, "logits": -1.1184250116348267, "logps": -77.62281799316406, "loss": 0.2243, "objective": 0.22593899071216583, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6000000238418579, "regularize": 0.22593899071216583, "step": 205 }, { "dpo_loss": 0.5418952107429504, "epoch": 1.1903637222484649, "grad_norm": 7.898188189840939, "learning_rate": 4.7129306529060415e-06, "logits": -1.0059646368026733, "logps": -78.90467834472656, "loss": 0.2215, "objective": 0.21866732835769653, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5, "ranking_simple": 0.612500011920929, "regularize": 0.21866732835769653, "step": 210 }, { "dpo_loss": 0.5478299856185913, "epoch": 1.2187057156353331, "grad_norm": 8.602045809831973, "learning_rate": 4.68942768290728e-06, "logits": -0.9527910947799683, "logps": -78.7351303100586, "loss": 0.2113, "objective": 0.21119125187397003, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.625, "regularize": 0.21119123697280884, "step": 215 }, { "dpo_loss": 0.5662825107574463, "epoch": 1.2470477090222012, "grad_norm": 8.077787041174624, "learning_rate": 4.665063509461098e-06, "logits": -0.8720958828926086, "logps": -76.756591796875, "loss": 0.2138, "objective": 0.20959888398647308, "ranking_idealized": 0.7208333611488342, "ranking_idealized_expo": 0.6041666865348816, "ranking_simple": 0.6958333253860474, "regularize": 0.20959888398647308, "step": 220 }, { "dpo_loss": 0.5461317300796509, "epoch": 1.2753897024090695, "grad_norm": 7.874404608360002, "learning_rate": 4.639847716126855e-06, "logits": -0.9673039317131042, "logps": -78.00302124023438, "loss": 0.2154, "objective": 0.20724421739578247, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6416666507720947, "regularize": 0.20724421739578247, "step": 225 }, { "dpo_loss": 0.5674420595169067, "epoch": 1.3037316957959377, "grad_norm": 7.50398022531709, "learning_rate": 4.613790221445511e-06, "logits": -0.9144200682640076, "logps": -78.14189910888672, "loss": 0.2025, "objective": 0.21042712032794952, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.637499988079071, "regularize": 0.21042712032794952, "step": 230 }, { "dpo_loss": 0.5355426669120789, "epoch": 1.3320736891828058, "grad_norm": 7.978556347314524, "learning_rate": 4.586901275038201e-06, "logits": -1.0436056852340698, "logps": -76.17823791503906, "loss": 0.2072, "objective": 0.19967219233512878, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6291666626930237, "regularize": 0.19967219233512878, "step": 235 }, { "dpo_loss": 0.5373556613922119, "epoch": 1.360415682569674, "grad_norm": 7.6373820167513635, "learning_rate": 4.559191453574582e-06, "logits": -0.9692198038101196, "logps": -78.42900085449219, "loss": 0.1972, "objective": 0.19282637536525726, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.6458333134651184, "regularize": 0.19282637536525726, "step": 240 }, { "dpo_loss": 0.5545187592506409, "epoch": 1.3887576759565423, "grad_norm": 7.35404104460057, "learning_rate": 4.530671656612544e-06, "logits": -0.992374062538147, "logps": -76.8333969116211, "loss": 0.1965, "objective": 0.19396263360977173, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.625, "regularize": 0.19396263360977173, "step": 245 }, { "dpo_loss": 0.536364734172821, "epoch": 1.4170996693434104, "grad_norm": 7.62812382511743, "learning_rate": 4.501353102310901e-06, "logits": -0.9773390293121338, "logps": -76.92967224121094, "loss": 0.2005, "objective": 0.2195345014333725, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.625, "regularize": 0.2195345014333725, "step": 250 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6815471053123474, "eval_logits": -1.0615853071212769, "eval_logps": -81.82124328613281, "eval_loss": 0.42915773391723633, "eval_objective": 0.4288579821586609, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5227272510528564, "eval_regularize": 0.4288579821586609, "eval_runtime": 259.2538, "eval_samples_per_second": 22.333, "eval_steps_per_second": 0.933, "step": 250 }, { "dpo_loss": 0.5536395907402039, "epoch": 1.4454416627302786, "grad_norm": 7.608994157416838, "learning_rate": 4.4712473230167775e-06, "logits": -0.9322084784507751, "logps": -77.44235229492188, "loss": 0.1968, "objective": 0.1830952763557434, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6499999761581421, "regularize": 0.1830952763557434, "step": 255 }, { "dpo_loss": 0.5560018420219421, "epoch": 1.473783656117147, "grad_norm": 7.9576485937673995, "learning_rate": 4.440366160729393e-06, "logits": -0.9907886385917664, "logps": -77.4918212890625, "loss": 0.197, "objective": 0.20965854823589325, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6333333253860474, "regularize": 0.20965854823589325, "step": 260 }, { "dpo_loss": 0.5228397250175476, "epoch": 1.5021256495040152, "grad_norm": 7.710193409626713, "learning_rate": 4.4087217624420595e-06, "logits": -0.9860392808914185, "logps": -76.1131362915039, "loss": 0.1993, "objective": 0.18088673055171967, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6333333253860474, "regularize": 0.18088671565055847, "step": 265 }, { "dpo_loss": 0.5495325922966003, "epoch": 1.5304676428908834, "grad_norm": 7.266856200970056, "learning_rate": 4.376326575364206e-06, "logits": -0.9504061341285706, "logps": -77.44710540771484, "loss": 0.1911, "objective": 0.1995639055967331, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.637499988079071, "regularize": 0.1995639055967331, "step": 270 }, { "dpo_loss": 0.5499274730682373, "epoch": 1.5588096362777515, "grad_norm": 7.524550189383748, "learning_rate": 4.34319334202531e-06, "logits": -0.959584653377533, "logps": -77.12781524658203, "loss": 0.1924, "objective": 0.20593222975730896, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6833333373069763, "regularize": 0.20593222975730896, "step": 275 }, { "dpo_loss": 0.5305771827697754, "epoch": 1.5871516296646198, "grad_norm": 7.773566185560746, "learning_rate": 4.309335095262675e-06, "logits": -0.855711042881012, "logps": -74.90408325195312, "loss": 0.1869, "objective": 0.18780963122844696, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6416666507720947, "regularize": 0.18780963122844696, "step": 280 }, { "dpo_loss": 0.5325539112091064, "epoch": 1.615493623051488, "grad_norm": 7.526664818437735, "learning_rate": 4.274765153095008e-06, "logits": -0.8781672120094299, "logps": -76.42524719238281, "loss": 0.1948, "objective": 0.18717069923877716, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6583333611488342, "regularize": 0.18717069923877716, "step": 285 }, { "dpo_loss": 0.5575224757194519, "epoch": 1.643835616438356, "grad_norm": 6.835237556478757, "learning_rate": 4.239497113483819e-06, "logits": -0.8322954773902893, "logps": -74.6546859741211, "loss": 0.1789, "objective": 0.182403564453125, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6625000238418579, "regularize": 0.182403564453125, "step": 290 }, { "dpo_loss": 0.5432038903236389, "epoch": 1.6721776098252243, "grad_norm": 6.961571934316881, "learning_rate": 4.203544848984729e-06, "logits": -0.8109145760536194, "logps": -72.75186920166016, "loss": 0.1859, "objective": 0.18214626610279083, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6416666507720947, "regularize": 0.18214626610279083, "step": 295 }, { "dpo_loss": 0.5352488160133362, "epoch": 1.7005196032120926, "grad_norm": 7.403474665890531, "learning_rate": 4.16692250129073e-06, "logits": -0.9637666344642639, "logps": -74.78925323486328, "loss": 0.187, "objective": 0.20280833542346954, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6958333253860474, "regularize": 0.20280833542346954, "step": 300 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6845287680625916, "eval_logits": -1.0398027896881104, "eval_logps": -80.0077133178711, "eval_loss": 0.43694496154785156, "eval_objective": 0.43617644906044006, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5258264541625977, "eval_regularize": 0.43617644906044006, "eval_runtime": 259.8028, "eval_samples_per_second": 22.286, "eval_steps_per_second": 0.931, "step": 300 }, { "dpo_loss": 0.5487022399902344, "epoch": 1.7288615965989607, "grad_norm": 7.543811016492746, "learning_rate": 4.129644475669617e-06, "logits": -0.9759048223495483, "logps": -74.88541412353516, "loss": 0.1848, "objective": 0.18824300169944763, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.625, "regularize": 0.18824300169944763, "step": 305 }, { "dpo_loss": 0.5396175980567932, "epoch": 1.7572035899858292, "grad_norm": 7.45667126547559, "learning_rate": 4.091725435297721e-06, "logits": -1.014137625694275, "logps": -71.69686126708984, "loss": 0.187, "objective": 0.18091975152492523, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6499999761581421, "regularize": 0.18091975152492523, "step": 310 }, { "dpo_loss": 0.5542029142379761, "epoch": 1.7855455833726972, "grad_norm": 7.022791945824948, "learning_rate": 4.053180295492203e-06, "logits": -0.9052151441574097, "logps": -72.4874038696289, "loss": 0.1773, "objective": 0.17407093942165375, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.612500011920929, "regularize": 0.17407093942165375, "step": 315 }, { "dpo_loss": 0.5506651401519775, "epoch": 1.8138875767595655, "grad_norm": 7.473980140791967, "learning_rate": 4.014024217844167e-06, "logits": -0.9277843832969666, "logps": -75.52190399169922, "loss": 0.1782, "objective": 0.19924014806747437, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.637499988079071, "regularize": 0.19924013316631317, "step": 320 }, { "dpo_loss": 0.5471735000610352, "epoch": 1.8422295701464337, "grad_norm": 7.500911105092317, "learning_rate": 3.974272604254906e-06, "logits": -0.9084165096282959, "logps": -77.74280548095703, "loss": 0.1791, "objective": 0.18182264268398285, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6875, "regularize": 0.18182264268398285, "step": 325 }, { "dpo_loss": 0.5376237034797668, "epoch": 1.8705715635333018, "grad_norm": 6.956332723343249, "learning_rate": 3.933941090877615e-06, "logits": -0.6826075911521912, "logps": -74.37185668945312, "loss": 0.1748, "objective": 0.15574544668197632, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.6499999761581421, "regularize": 0.15574544668197632, "step": 330 }, { "dpo_loss": 0.5471087694168091, "epoch": 1.89891355692017, "grad_norm": 7.004792858354732, "learning_rate": 3.893045541966975e-06, "logits": -0.8625032901763916, "logps": -73.06546020507812, "loss": 0.1666, "objective": 0.18181832134723663, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6291666626930237, "regularize": 0.18181832134723663, "step": 335 }, { "dpo_loss": 0.5468536615371704, "epoch": 1.9272555503070383, "grad_norm": 7.263082545218723, "learning_rate": 3.8516020436389945e-06, "logits": -0.8647869825363159, "logps": -74.24337005615234, "loss": 0.1658, "objective": 0.16866040229797363, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.7166666388511658, "regularize": 0.16866040229797363, "step": 340 }, { "dpo_loss": 0.5612522959709167, "epoch": 1.9555975436939064, "grad_norm": 7.17931193885588, "learning_rate": 3.8096268975436045e-06, "logits": -0.9274277091026306, "logps": -72.64501953125, "loss": 0.1631, "objective": 0.1834598332643509, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6041666865348816, "regularize": 0.1834598332643509, "step": 345 }, { "dpo_loss": 0.5486599802970886, "epoch": 1.9839395370807746, "grad_norm": 7.4574942962802675, "learning_rate": 3.767136614452458e-06, "logits": -0.8831450343132019, "logps": -75.68425750732422, "loss": 0.1664, "objective": 0.15772633254528046, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.6166666746139526, "regularize": 0.15772633254528046, "step": 350 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6841984987258911, "eval_logits": -0.9982038140296936, "eval_logps": -79.63081359863281, "eval_loss": 0.4382030665874481, "eval_objective": 0.4358615577220917, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5289255976676941, "eval_regularize": 0.4358615577220917, "eval_runtime": 258.5329, "eval_samples_per_second": 22.396, "eval_steps_per_second": 0.936, "step": 350 }, { "dpo_loss": 0.5294139981269836, "epoch": 2.012281530467643, "grad_norm": 7.097987961305298, "learning_rate": 3.724147907764478e-06, "logits": -0.8353627324104309, "logps": -75.18488311767578, "loss": 0.1556, "objective": 0.14675287902355194, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.675000011920929, "regularize": 0.14675287902355194, "step": 355 }, { "dpo_loss": 0.5278249979019165, "epoch": 2.040623523854511, "grad_norm": 7.097858061871919, "learning_rate": 3.6806776869317074e-06, "logits": -0.8662230372428894, "logps": -74.37833404541016, "loss": 0.1484, "objective": 0.13966700434684753, "ranking_idealized": 0.7250000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6916666626930237, "regularize": 0.13966700434684753, "step": 360 }, { "dpo_loss": 0.5365410447120667, "epoch": 2.0689655172413794, "grad_norm": 7.018046135257267, "learning_rate": 3.6367430508080283e-06, "logits": -0.9098676443099976, "logps": -77.11681365966797, "loss": 0.1521, "objective": 0.14031733572483063, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6541666388511658, "regularize": 0.14031733572483063, "step": 365 }, { "dpo_loss": 0.5375342965126038, "epoch": 2.0973075106282475, "grad_norm": 6.853083763524368, "learning_rate": 3.5923612809233987e-06, "logits": -0.8166040778160095, "logps": -74.46318817138672, "loss": 0.1422, "objective": 0.14259177446365356, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6499999761581421, "regularize": 0.14259177446365356, "step": 370 }, { "dpo_loss": 0.5261290669441223, "epoch": 2.1256495040151155, "grad_norm": 6.737212175634408, "learning_rate": 3.547549834686222e-06, "logits": -0.7999446392059326, "logps": -76.43958282470703, "loss": 0.1396, "objective": 0.14750143885612488, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.7124999761581421, "regularize": 0.14750143885612488, "step": 375 }, { "dpo_loss": 0.5414277911186218, "epoch": 2.153991497401984, "grad_norm": 7.256488804126431, "learning_rate": 3.5023263385165346e-06, "logits": -0.7813270092010498, "logps": -75.50519561767578, "loss": 0.1433, "objective": 0.14621533453464508, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.675000011920929, "regularize": 0.14621533453464508, "step": 380 }, { "dpo_loss": 0.5473071932792664, "epoch": 2.182333490788852, "grad_norm": 7.1971925561639445, "learning_rate": 3.4567085809127247e-06, "logits": -0.8520491123199463, "logps": -77.47991180419922, "loss": 0.1389, "objective": 0.13993406295776367, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6583333611488342, "regularize": 0.13993406295776367, "step": 385 }, { "dpo_loss": 0.538723349571228, "epoch": 2.21067548417572, "grad_norm": 7.027817565044039, "learning_rate": 3.410714505454486e-06, "logits": -0.80887770652771, "logps": -76.32317352294922, "loss": 0.1308, "objective": 0.12663429975509644, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6000000238418579, "regularize": 0.12663428485393524, "step": 390 }, { "dpo_loss": 0.5224890112876892, "epoch": 2.2390174775625886, "grad_norm": 6.949068306199585, "learning_rate": 3.364362203744777e-06, "logits": -0.8549118638038635, "logps": -76.35110473632812, "loss": 0.1437, "objective": 0.14411191642284393, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.675000011920929, "regularize": 0.14411191642284393, "step": 395 }, { "dpo_loss": 0.5168216228485107, "epoch": 2.2673594709494567, "grad_norm": 7.191883909269667, "learning_rate": 3.3176699082935546e-06, "logits": -0.9377852082252502, "logps": -77.43128204345703, "loss": 0.1368, "objective": 0.13501186668872833, "ranking_idealized": 0.7583333253860474, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.737500011920929, "regularize": 0.13501186668872833, "step": 400 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6858804225921631, "eval_logits": -1.0155284404754639, "eval_logps": -80.20379638671875, "eval_loss": 0.4407689571380615, "eval_objective": 0.43782898783683777, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5320248007774353, "eval_regularize": 0.43782898783683777, "eval_runtime": 259.3137, "eval_samples_per_second": 22.328, "eval_steps_per_second": 0.933, "step": 400 }, { "dpo_loss": 0.537171483039856, "epoch": 2.295701464336325, "grad_norm": 7.007268345576302, "learning_rate": 3.2706559853460818e-06, "logits": -1.0055091381072998, "logps": -74.74109649658203, "loss": 0.1383, "objective": 0.1392410695552826, "ranking_idealized": 0.737500011920929, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.7083333134651184, "regularize": 0.1392410695552826, "step": 405 }, { "dpo_loss": 0.5326074361801147, "epoch": 2.324043457723193, "grad_norm": 6.941825657006022, "learning_rate": 3.2233389276586325e-06, "logits": -0.8970204591751099, "logps": -74.2923355102539, "loss": 0.1297, "objective": 0.12273009866476059, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6458333134651184, "regularize": 0.12273009866476059, "step": 410 }, { "dpo_loss": 0.5270788073539734, "epoch": 2.3523854511100613, "grad_norm": 7.366817713585052, "learning_rate": 3.1757373472244324e-06, "logits": -0.8971990942955017, "logps": -74.44580078125, "loss": 0.1319, "objective": 0.14558042585849762, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6333333253860474, "regularize": 0.14558042585849762, "step": 415 }, { "dpo_loss": 0.5407485961914062, "epoch": 2.3807274444969297, "grad_norm": 7.016879855860433, "learning_rate": 3.127869967952698e-06, "logits": -0.8165015578269958, "logps": -75.92127227783203, "loss": 0.1297, "objective": 0.13260656595230103, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.6333333253860474, "regularize": 0.13260656595230103, "step": 420 }, { "dpo_loss": 0.526520848274231, "epoch": 2.409069437883798, "grad_norm": 6.605258428992394, "learning_rate": 3.0797556183036582e-06, "logits": -0.8272897601127625, "logps": -74.90123748779297, "loss": 0.1262, "objective": 0.12788838148117065, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6499999761581421, "regularize": 0.12788838148117065, "step": 425 }, { "dpo_loss": 0.5405741333961487, "epoch": 2.4374114312706663, "grad_norm": 6.6924189063588955, "learning_rate": 3.0314132238824416e-06, "logits": -0.8260743021965027, "logps": -75.06442260742188, "loss": 0.1266, "objective": 0.11811564862728119, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6416666507720947, "regularize": 0.11811564117670059, "step": 430 }, { "dpo_loss": 0.5238969922065735, "epoch": 2.4657534246575343, "grad_norm": 6.903355919273098, "learning_rate": 2.9828617999947647e-06, "logits": -0.8589097857475281, "logps": -75.69316101074219, "loss": 0.1249, "objective": 0.12512782216072083, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6708333492279053, "regularize": 0.12512782216072083, "step": 435 }, { "dpo_loss": 0.5445213317871094, "epoch": 2.4940954180444024, "grad_norm": 6.961032298685898, "learning_rate": 2.9341204441673267e-06, "logits": -0.7675038576126099, "logps": -74.55623626708984, "loss": 0.126, "objective": 0.12659841775894165, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6208333373069763, "regularize": 0.12659841775894165, "step": 440 }, { "dpo_loss": 0.5370126366615295, "epoch": 2.5224374114312704, "grad_norm": 6.94281741073779, "learning_rate": 2.8852083286358647e-06, "logits": -0.7942711710929871, "logps": -71.90986633300781, "loss": 0.1211, "objective": 0.13307242095470428, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.675000011920929, "regularize": 0.13307242095470428, "step": 445 }, { "dpo_loss": 0.5622422099113464, "epoch": 2.550779404818139, "grad_norm": 7.14501016601336, "learning_rate": 2.8361446928038298e-06, "logits": -0.8527530431747437, "logps": -74.12069702148438, "loss": 0.122, "objective": 0.13083526492118835, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5, "ranking_simple": 0.612500011920929, "regularize": 0.13083526492118835, "step": 450 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.6862838268280029, "eval_logits": -0.8946070671081543, "eval_logps": -78.42880249023438, "eval_loss": 0.44150272011756897, "eval_objective": 0.44035181403160095, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5258264541625977, "eval_regularize": 0.44035181403160095, "eval_runtime": 258.0077, "eval_samples_per_second": 22.441, "eval_steps_per_second": 0.938, "step": 450 }, { "dpo_loss": 0.5327574610710144, "epoch": 2.579121398205007, "grad_norm": 7.160178764520009, "learning_rate": 2.7869488356746344e-06, "logits": -0.8465222716331482, "logps": -74.07278442382812, "loss": 0.1216, "objective": 0.1370854526758194, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6625000238418579, "regularize": 0.1370854526758194, "step": 455 }, { "dpo_loss": 0.5250583291053772, "epoch": 2.6074633915918755, "grad_norm": 7.146508258603712, "learning_rate": 2.7376401082604563e-06, "logits": -0.8942106366157532, "logps": -74.29560852050781, "loss": 0.1165, "objective": 0.12135622650384903, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6666666865348816, "regularize": 0.12135622650384903, "step": 460 }, { "dpo_loss": 0.530348539352417, "epoch": 2.6358053849787435, "grad_norm": 7.184783585057386, "learning_rate": 2.6882379059705953e-06, "logits": -0.7349064350128174, "logps": -73.84181213378906, "loss": 0.1204, "objective": 0.1292448341846466, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6208333373069763, "regularize": 0.1292448341846466, "step": 465 }, { "dpo_loss": 0.5289201140403748, "epoch": 2.6641473783656116, "grad_norm": 7.1334661921422216, "learning_rate": 2.6387616609823506e-06, "logits": -0.7510494589805603, "logps": -73.42973327636719, "loss": 0.1121, "objective": 0.10491514950990677, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6541666388511658, "regularize": 0.10491514950990677, "step": 470 }, { "dpo_loss": 0.5439261794090271, "epoch": 2.69248937175248, "grad_norm": 7.044559545822627, "learning_rate": 2.5892308345974517e-06, "logits": -0.7502660155296326, "logps": -73.04237365722656, "loss": 0.1114, "objective": 0.11930320411920547, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6583333611488342, "regularize": 0.11930320411920547, "step": 475 }, { "dpo_loss": 0.528633177280426, "epoch": 2.720831365139348, "grad_norm": 7.216679182755541, "learning_rate": 2.53966490958702e-06, "logits": -0.832193911075592, "logps": -73.19383239746094, "loss": 0.1104, "objective": 0.10962951928377151, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6916666626930237, "regularize": 0.10962951928377151, "step": 480 }, { "dpo_loss": 0.5266162753105164, "epoch": 2.7491733585262166, "grad_norm": 6.942552990674209, "learning_rate": 2.490083382528097e-06, "logits": -0.780593752861023, "logps": -75.74951171875, "loss": 0.1126, "objective": 0.10767225921154022, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.675000011920929, "regularize": 0.10767225176095963, "step": 485 }, { "dpo_loss": 0.5204401016235352, "epoch": 2.7775153519130846, "grad_norm": 6.79511973951677, "learning_rate": 2.440505756134732e-06, "logits": -0.7421233654022217, "logps": -74.27189636230469, "loss": 0.1117, "objective": 0.11147340387105942, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.6833333373069763, "regularize": 0.11147340387105942, "step": 490 }, { "dpo_loss": 0.5330770015716553, "epoch": 2.8058573452999527, "grad_norm": 7.282976503781254, "learning_rate": 2.3909515315866606e-06, "logits": -0.7717820405960083, "logps": -72.36864471435547, "loss": 0.1034, "objective": 0.10382074862718582, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6333333253860474, "regularize": 0.10382074862718582, "step": 495 }, { "dpo_loss": 0.5220891833305359, "epoch": 2.8341993386868207, "grad_norm": 7.004605377489112, "learning_rate": 2.341440200858589e-06, "logits": -0.822429895401001, "logps": -71.28691864013672, "loss": 0.1063, "objective": 0.11509209126234055, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6666666865348816, "regularize": 0.11509209126234055, "step": 500 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6860550045967102, "eval_logits": -0.8682713508605957, "eval_logps": -78.1278076171875, "eval_loss": 0.44106075167655945, "eval_objective": 0.43840065598487854, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5299586653709412, "eval_regularize": 0.43840065598487854, "eval_runtime": 259.0969, "eval_samples_per_second": 22.347, "eval_steps_per_second": 0.934, "step": 500 }, { "dpo_loss": 0.5273423790931702, "epoch": 2.862541332073689, "grad_norm": 6.758921464396605, "learning_rate": 2.2919912390530945e-06, "logits": -0.7519776225090027, "logps": -72.76760864257812, "loss": 0.1035, "objective": 0.10023737698793411, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6833333373069763, "regularize": 0.10023736953735352, "step": 505 }, { "dpo_loss": 0.5383204817771912, "epoch": 2.8908833254605573, "grad_norm": 6.749917335321248, "learning_rate": 2.242624096740164e-06, "logits": -0.7136736512184143, "logps": -73.48322296142578, "loss": 0.1008, "objective": 0.10292276740074158, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.675000011920929, "regularize": 0.10292276740074158, "step": 510 }, { "dpo_loss": 0.5426943302154541, "epoch": 2.9192253188474258, "grad_norm": 6.80108683493094, "learning_rate": 2.193358192306384e-06, "logits": -0.844546914100647, "logps": -72.76075744628906, "loss": 0.1039, "objective": 0.10740550607442856, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.6583333611488342, "regularize": 0.10740550607442856, "step": 515 }, { "dpo_loss": 0.5363429188728333, "epoch": 2.947567312234294, "grad_norm": 7.22597227685905, "learning_rate": 2.1442129043167877e-06, "logits": -0.7738971710205078, "logps": -74.9779281616211, "loss": 0.102, "objective": 0.11110852658748627, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6458333134651184, "regularize": 0.11110852658748627, "step": 520 }, { "dpo_loss": 0.5326921343803406, "epoch": 2.975909305621162, "grad_norm": 7.020529204895981, "learning_rate": 2.0952075638923656e-06, "logits": -0.773563027381897, "logps": -73.9379653930664, "loss": 0.0967, "objective": 0.1049317866563797, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.699999988079071, "regularize": 0.1049317866563797, "step": 525 }, { "dpo_loss": 0.5151563286781311, "epoch": 3.0042512990080303, "grad_norm": 6.663633146942429, "learning_rate": 2.046361447106244e-06, "logits": -0.7806794047355652, "logps": -73.31244659423828, "loss": 0.0987, "objective": 0.10096151381731033, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6708333492279053, "regularize": 0.10096149891614914, "step": 530 }, { "dpo_loss": 0.5298264622688293, "epoch": 3.0325932923948984, "grad_norm": 6.838429513652437, "learning_rate": 1.997693767401503e-06, "logits": -0.8067893981933594, "logps": -75.07320404052734, "loss": 0.0914, "objective": 0.09071025252342224, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6708333492279053, "regularize": 0.09071025252342224, "step": 535 }, { "dpo_loss": 0.5308666825294495, "epoch": 3.0609352857817664, "grad_norm": 6.786419151482898, "learning_rate": 1.9492236680336486e-06, "logits": -0.8495451807975769, "logps": -72.48417663574219, "loss": 0.0869, "objective": 0.08104575425386429, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6458333134651184, "regularize": 0.0810457393527031, "step": 540 }, { "dpo_loss": 0.5390760898590088, "epoch": 3.089277279168635, "grad_norm": 6.715619451066239, "learning_rate": 1.9009702145406728e-06, "logits": -0.7783963084220886, "logps": -73.88996887207031, "loss": 0.0873, "objective": 0.08420184254646301, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.6208333373069763, "regularize": 0.08420184254646301, "step": 545 }, { "dpo_loss": 0.5265496373176575, "epoch": 3.117619272555503, "grad_norm": 6.799228974819442, "learning_rate": 1.852952387243698e-06, "logits": -0.6418666243553162, "logps": -73.9046401977539, "loss": 0.0878, "objective": 0.09073540568351746, "ranking_idealized": 0.699999988079071, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.7041666507720947, "regularize": 0.09073540568351746, "step": 550 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 0.6847736239433289, "eval_logits": -0.8292139172554016, "eval_logps": -77.63910675048828, "eval_loss": 0.44063544273376465, "eval_objective": 0.43784743547439575, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5330578684806824, "eval_regularize": 0.43784743547439575, "eval_runtime": 259.006, "eval_samples_per_second": 22.355, "eval_steps_per_second": 0.934, "step": 550 }, { "dpo_loss": 0.5151117444038391, "epoch": 3.1459612659423715, "grad_norm": 7.0486870886771955, "learning_rate": 1.8051890737811395e-06, "logits": -0.6810140013694763, "logps": -74.01383972167969, "loss": 0.0855, "objective": 0.08012814819812775, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6833333373069763, "regularize": 0.08012814074754715, "step": 555 }, { "dpo_loss": 0.5303942561149597, "epoch": 3.1743032593292395, "grad_norm": 6.686572075133422, "learning_rate": 1.7576990616793139e-06, "logits": -0.7435484528541565, "logps": -70.75598907470703, "loss": 0.0855, "objective": 0.07928713411092758, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6416666507720947, "regularize": 0.07928712666034698, "step": 560 }, { "dpo_loss": 0.5257388353347778, "epoch": 3.2026452527161076, "grad_norm": 7.035317567433199, "learning_rate": 1.7105010309624381e-06, "logits": -0.7857434153556824, "logps": -71.99687194824219, "loss": 0.0807, "objective": 0.07652737945318222, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6833333373069763, "regularize": 0.07652737945318222, "step": 565 }, { "dpo_loss": 0.5218066573143005, "epoch": 3.230987246102976, "grad_norm": 6.567722467858525, "learning_rate": 1.6636135468049122e-06, "logits": -0.7239015698432922, "logps": -72.04088592529297, "loss": 0.0814, "objective": 0.07984793186187744, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.6291666626930237, "regularize": 0.07984793186187744, "step": 570 }, { "dpo_loss": 0.5289373397827148, "epoch": 3.259329239489844, "grad_norm": 6.699684239267555, "learning_rate": 1.617055052228768e-06, "logits": -0.7765447497367859, "logps": -72.74471282958984, "loss": 0.0823, "objective": 0.08229862153530121, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6958333253860474, "regularize": 0.08229862153530121, "step": 575 }, { "dpo_loss": 0.5433183908462524, "epoch": 3.287671232876712, "grad_norm": 6.701807007044451, "learning_rate": 1.5708438608491816e-06, "logits": -0.7891409397125244, "logps": -73.1654052734375, "loss": 0.0794, "objective": 0.07175194472074509, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6499999761581421, "regularize": 0.0717519223690033, "step": 580 }, { "dpo_loss": 0.5400884747505188, "epoch": 3.3160132262635806, "grad_norm": 7.671988032753608, "learning_rate": 1.524998149670871e-06, "logits": -0.817208468914032, "logps": -74.3894271850586, "loss": 0.0807, "objective": 0.0763852447271347, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6958333253860474, "regularize": 0.0763852447271347, "step": 585 }, { "dpo_loss": 0.539016604423523, "epoch": 3.3443552196504487, "grad_norm": 7.045114972527912, "learning_rate": 1.479535951938243e-06, "logits": -0.8479073643684387, "logps": -74.54483795166016, "loss": 0.0782, "objective": 0.07480078190565109, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6708333492279053, "regularize": 0.07480078190565109, "step": 590 }, { "dpo_loss": 0.5274596810340881, "epoch": 3.372697213037317, "grad_norm": 6.813876249894319, "learning_rate": 1.43447515004208e-06, "logits": -0.7586421966552734, "logps": -73.8796615600586, "loss": 0.0725, "objective": 0.06939196586608887, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.06939195841550827, "step": 595 }, { "dpo_loss": 0.507723331451416, "epoch": 3.4010392064241852, "grad_norm": 6.843883025807739, "learning_rate": 1.3898334684855647e-06, "logits": -0.7954932451248169, "logps": -73.50051879882812, "loss": 0.0719, "objective": 0.0723666176199913, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6875, "regularize": 0.0723666176199913, "step": 600 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 0.6851052045822144, "eval_logits": -0.8875375390052795, "eval_logps": -77.49230194091797, "eval_loss": 0.439556360244751, "eval_objective": 0.43727535009384155, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5309917330741882, "eval_regularize": 0.43727535009384155, "eval_runtime": 260.0756, "eval_samples_per_second": 22.263, "eval_steps_per_second": 0.93, "step": 600 }, { "dpo_loss": 0.539269745349884, "epoch": 3.4293811998110533, "grad_norm": 6.836932990419662, "learning_rate": 1.3456284669124159e-06, "logits": -0.8135491013526917, "logps": -75.25065612792969, "loss": 0.072, "objective": 0.06646443903446198, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6708333492279053, "regularize": 0.06646443903446198, "step": 605 }, { "dpo_loss": 0.532633364200592, "epoch": 3.4577231931979218, "grad_norm": 6.918313902636074, "learning_rate": 1.301877533199859e-06, "logits": -0.812556266784668, "logps": -73.47212219238281, "loss": 0.0726, "objective": 0.06124640628695488, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.675000011920929, "regularize": 0.06124639883637428, "step": 610 }, { "dpo_loss": 0.522627055644989, "epoch": 3.48606518658479, "grad_norm": 6.525966688786376, "learning_rate": 1.2585978766191726e-06, "logits": -0.8209077715873718, "logps": -74.44559478759766, "loss": 0.0694, "objective": 0.0690179094672203, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.6166666746139526, "regularize": 0.0690179094672203, "step": 615 }, { "dpo_loss": 0.5149637460708618, "epoch": 3.514407179971658, "grad_norm": 6.444084091552882, "learning_rate": 1.2158065210664848e-06, "logits": -0.734274685382843, "logps": -73.22693634033203, "loss": 0.0739, "objective": 0.06696704030036926, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6333333253860474, "regularize": 0.06696703284978867, "step": 620 }, { "dpo_loss": 0.5303381085395813, "epoch": 3.5427491733585263, "grad_norm": 7.253457560302881, "learning_rate": 1.1735202983664803e-06, "logits": -0.763002336025238, "logps": -71.75556182861328, "loss": 0.0684, "objective": 0.06924500316381454, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.675000011920929, "regularize": 0.06924500316381454, "step": 625 }, { "dpo_loss": 0.5344926118850708, "epoch": 3.5710911667453944, "grad_norm": 7.160250572063496, "learning_rate": 1.1317558416516696e-06, "logits": -0.8248269557952881, "logps": -72.7044677734375, "loss": 0.0685, "objective": 0.06329541653394699, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6416666507720947, "regularize": 0.0632954090833664, "step": 630 }, { "dpo_loss": 0.5331679582595825, "epoch": 3.5994331601322624, "grad_norm": 6.5609855806488575, "learning_rate": 1.0905295788197993e-06, "logits": -0.7775312066078186, "logps": -72.57146453857422, "loss": 0.0645, "objective": 0.06953860074281693, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6208333373069763, "regularize": 0.06953860074281693, "step": 635 }, { "dpo_loss": 0.5081437826156616, "epoch": 3.627775153519131, "grad_norm": 6.931309552817511, "learning_rate": 1.049857726072005e-06, "logits": -0.7663463354110718, "logps": -74.27519989013672, "loss": 0.0641, "objective": 0.06706919521093369, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.6458333134651184, "regularize": 0.06706918776035309, "step": 640 }, { "dpo_loss": 0.5230153799057007, "epoch": 3.656117146905999, "grad_norm": 6.607222677286154, "learning_rate": 1.0097562815342215e-06, "logits": -0.7920152544975281, "logps": -71.90898132324219, "loss": 0.0644, "objective": 0.06532428413629532, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6583333611488342, "regularize": 0.06532428413629532, "step": 645 }, { "dpo_loss": 0.5418646335601807, "epoch": 3.6844591402928675, "grad_norm": 6.694188599643782, "learning_rate": 9.702410189643838e-07, "logits": -0.8044089674949646, "logps": -72.86306762695312, "loss": 0.0618, "objective": 0.05348058044910431, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6499999761581421, "regularize": 0.05348057672381401, "step": 650 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 0.6855461001396179, "eval_logits": -0.9103026986122131, "eval_logps": -77.18383026123047, "eval_loss": 0.4394647479057312, "eval_objective": 0.43862253427505493, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5268595218658447, "eval_regularize": 0.43862253427505493, "eval_runtime": 259.3939, "eval_samples_per_second": 22.321, "eval_steps_per_second": 0.933, "step": 650 }, { "dpo_loss": 0.5326651930809021, "epoch": 3.7128011336797355, "grad_norm": 6.747215671656663, "learning_rate": 9.313274815478698e-07, "logits": -0.8151116967201233, "logps": -72.32289123535156, "loss": 0.0682, "objective": 0.06774523854255676, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6666666865348816, "regularize": 0.06774523109197617, "step": 655 }, { "dpo_loss": 0.5296677947044373, "epoch": 3.7411431270666036, "grad_norm": 6.603209874734441, "learning_rate": 8.930309757836517e-07, "logits": -0.8544061779975891, "logps": -72.76691436767578, "loss": 0.0605, "objective": 0.06562329083681107, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6958333253860474, "regularize": 0.06562329083681107, "step": 660 }, { "dpo_loss": 0.5254350900650024, "epoch": 3.769485120453472, "grad_norm": 6.7420568501361, "learning_rate": 8.553665654635343e-07, "logits": -0.7711302042007446, "logps": -72.90251159667969, "loss": 0.0589, "objective": 0.05725221708416939, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6791666746139526, "regularize": 0.057252202183008194, "step": 665 }, { "dpo_loss": 0.529458224773407, "epoch": 3.79782711384034, "grad_norm": 6.596226604829734, "learning_rate": 8.183490657468687e-07, "logits": -0.874411940574646, "logps": -73.00768280029297, "loss": 0.0592, "objective": 0.0637064203619957, "ranking_idealized": 0.7333333492279053, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.7333333492279053, "regularize": 0.0637064203619957, "step": 670 }, { "dpo_loss": 0.5332936644554138, "epoch": 3.826169107227208, "grad_norm": 6.620708805759934, "learning_rate": 7.819930373330669e-07, "logits": -0.8352341055870056, "logps": -72.00785827636719, "loss": 0.0556, "objective": 0.05346338450908661, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6666666865348816, "regularize": 0.05346338450908661, "step": 675 }, { "dpo_loss": 0.5433677434921265, "epoch": 3.8545111006140766, "grad_norm": 6.502071174130889, "learning_rate": 7.463127807341966e-07, "logits": -0.7840080261230469, "logps": -72.76580047607422, "loss": 0.0571, "objective": 0.05716581270098686, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6416666507720947, "regularize": 0.057165808975696564, "step": 680 }, { "dpo_loss": 0.5243024230003357, "epoch": 3.8828530940009447, "grad_norm": 6.827039590730102, "learning_rate": 7.113223306499336e-07, "logits": -0.8465909361839294, "logps": -72.07350158691406, "loss": 0.0572, "objective": 0.053640857338905334, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6791666746139526, "regularize": 0.05364084988832474, "step": 685 }, { "dpo_loss": 0.5204752087593079, "epoch": 3.9111950873878127, "grad_norm": 6.658562071017675, "learning_rate": 6.770354504470575e-07, "logits": -0.8739193677902222, "logps": -71.3963394165039, "loss": 0.0561, "objective": 0.054595671594142914, "ranking_idealized": 0.7041666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6958333253860474, "regularize": 0.05459566414356232, "step": 690 }, { "dpo_loss": 0.5284795761108398, "epoch": 3.9395370807746812, "grad_norm": 6.480093534880764, "learning_rate": 6.434656267456843e-07, "logits": -0.8127073049545288, "logps": -72.72264862060547, "loss": 0.0567, "objective": 0.06189308688044548, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.6083333492279053, "regularize": 0.06189308688044548, "step": 695 }, { "dpo_loss": 0.5350829362869263, "epoch": 3.9678790741615493, "grad_norm": 6.7060271282278245, "learning_rate": 6.106260641143547e-07, "logits": -0.833633303642273, "logps": -73.22267150878906, "loss": 0.0551, "objective": 0.05491795390844345, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6166666746139526, "regularize": 0.05491795018315315, "step": 700 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 0.6859015226364136, "eval_logits": -0.9137452244758606, "eval_logps": -77.72093200683594, "eval_loss": 0.440186470746994, "eval_objective": 0.43875548243522644, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5289255976676941, "eval_regularize": 0.43875548243522644, "eval_runtime": 259.3718, "eval_samples_per_second": 22.323, "eval_steps_per_second": 0.933, "step": 700 }, { "dpo_loss": 0.5229139924049377, "epoch": 3.9962210675484178, "grad_norm": 6.704821856039977, "learning_rate": 5.785296798760601e-07, "logits": -0.7782571911811829, "logps": -72.21749877929688, "loss": 0.0552, "objective": 0.048626501113176346, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6666666865348816, "regularize": 0.04862649738788605, "step": 705 }, { "dpo_loss": 0.5162150263786316, "epoch": 4.024563060935286, "grad_norm": 7.292565223355315, "learning_rate": 5.471890990272666e-07, "logits": -0.814688503742218, "logps": -73.78315734863281, "loss": 0.0446, "objective": 0.045689478516578674, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.675000011920929, "regularize": 0.04568947106599808, "step": 710 }, { "dpo_loss": 0.5273666977882385, "epoch": 4.052905054322154, "grad_norm": 6.71901500064993, "learning_rate": 5.166166492719124e-07, "logits": -0.7857570648193359, "logps": -72.7701187133789, "loss": 0.0422, "objective": 0.04041726142168045, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.675000011920929, "regularize": 0.04041723534464836, "step": 715 }, { "dpo_loss": 0.5177367329597473, "epoch": 4.081247047709022, "grad_norm": 6.659794774093643, "learning_rate": 4.868243561723535e-07, "logits": -0.7260258793830872, "logps": -75.34712219238281, "loss": 0.0448, "objective": 0.041196659207344055, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6583333611488342, "regularize": 0.04119665548205376, "step": 720 }, { "dpo_loss": 0.5408446192741394, "epoch": 4.109589041095891, "grad_norm": 6.565784729673605, "learning_rate": 4.57823938419153e-07, "logits": -0.786972165107727, "logps": -72.7632064819336, "loss": 0.0445, "objective": 0.04578384384512901, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6541666388511658, "regularize": 0.045783836394548416, "step": 725 }, { "dpo_loss": 0.5255261659622192, "epoch": 4.137931034482759, "grad_norm": 6.9190144367452975, "learning_rate": 4.2962680322157335e-07, "logits": -0.8668403625488281, "logps": -72.80368041992188, "loss": 0.0469, "objective": 0.0492975153028965, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.675000011920929, "regularize": 0.04929749667644501, "step": 730 }, { "dpo_loss": 0.5357745885848999, "epoch": 4.166273027869627, "grad_norm": 6.713218850119101, "learning_rate": 4.0224404182059443e-07, "logits": -0.7645056843757629, "logps": -74.07119750976562, "loss": 0.04, "objective": 0.04291637986898422, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6791666746139526, "regularize": 0.042916372418403625, "step": 735 }, { "dpo_loss": 0.5108519196510315, "epoch": 4.194615021256495, "grad_norm": 6.802036362247038, "learning_rate": 3.756864251262143e-07, "logits": -0.8028141260147095, "logps": -73.28968048095703, "loss": 0.0412, "objective": 0.044031720608472824, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6708333492279053, "regularize": 0.04403171315789223, "step": 740 }, { "dpo_loss": 0.5228927731513977, "epoch": 4.222957014643363, "grad_norm": 6.653699462048885, "learning_rate": 3.499643994807486e-07, "logits": -0.8697967529296875, "logps": -70.57527160644531, "loss": 0.0428, "objective": 0.04283791035413742, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6416666507720947, "regularize": 0.042837902903556824, "step": 745 }, { "dpo_loss": 0.5164041519165039, "epoch": 4.251299008030231, "grad_norm": 6.911061908397223, "learning_rate": 3.250880825498026e-07, "logits": -0.9160488843917847, "logps": -72.56361389160156, "loss": 0.0388, "objective": 0.03974687308073044, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6541666388511658, "regularize": 0.03974686935544014, "step": 750 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 0.685931921005249, "eval_logits": -0.8975909352302551, "eval_logps": -77.07003021240234, "eval_loss": 0.4403546452522278, "eval_objective": 0.438612699508667, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5309917330741882, "eval_regularize": 0.438612699508667, "eval_runtime": 259.3145, "eval_samples_per_second": 22.328, "eval_steps_per_second": 0.933, "step": 750 }, { "dpo_loss": 0.5212615132331848, "epoch": 4.2796410014171, "grad_norm": 6.600495039472016, "learning_rate": 3.0106725934252095e-07, "logits": -0.8632883429527283, "logps": -71.9403076171875, "loss": 0.0401, "objective": 0.039546623826026917, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6708333492279053, "regularize": 0.03954662010073662, "step": 755 }, { "dpo_loss": 0.5251290798187256, "epoch": 4.307982994803968, "grad_norm": 6.496157144727595, "learning_rate": 2.779113783626916e-07, "logits": -0.8375190496444702, "logps": -73.24321746826172, "loss": 0.0397, "objective": 0.03997815027832985, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6958333253860474, "regularize": 0.039978139102458954, "step": 760 }, { "dpo_loss": 0.5071607828140259, "epoch": 4.336324988190836, "grad_norm": 6.910390218786043, "learning_rate": 2.5562954789221164e-07, "logits": -0.8520547747612, "logps": -73.03939056396484, "loss": 0.0405, "objective": 0.036770131438970566, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.7291666865348816, "regularize": 0.03677012771368027, "step": 765 }, { "dpo_loss": 0.509007453918457, "epoch": 4.364666981577704, "grad_norm": 6.462624726733492, "learning_rate": 2.3423053240837518e-07, "logits": -0.7884809970855713, "logps": -71.66094207763672, "loss": 0.0396, "objective": 0.03581638261675835, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6666666865348816, "regularize": 0.03581637516617775, "step": 770 }, { "dpo_loss": 0.5204900503158569, "epoch": 4.393008974964572, "grad_norm": 6.9115021717677445, "learning_rate": 2.137227491364016e-07, "logits": -0.8254011869430542, "logps": -72.81352233886719, "loss": 0.0382, "objective": 0.03518719598650932, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6583333611488342, "regularize": 0.03518717736005783, "step": 775 }, { "dpo_loss": 0.5285161733627319, "epoch": 4.42135096835144, "grad_norm": 6.553973237097607, "learning_rate": 1.941142647385469e-07, "logits": -0.843854546546936, "logps": -71.42961120605469, "loss": 0.0393, "objective": 0.03610792011022568, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6541666388511658, "regularize": 0.03610791638493538, "step": 780 }, { "dpo_loss": 0.5424137115478516, "epoch": 4.449692961738309, "grad_norm": 6.303662663640985, "learning_rate": 1.7541279214111277e-07, "logits": -0.8690065741539001, "logps": -71.59990692138672, "loss": 0.0409, "objective": 0.0452335849404335, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6416666507720947, "regularize": 0.04523357003927231, "step": 785 }, { "dpo_loss": 0.5163858532905579, "epoch": 4.478034955125177, "grad_norm": 6.732149069961477, "learning_rate": 1.5762568750059604e-07, "logits": -0.8400804400444031, "logps": -73.97518920898438, "loss": 0.0369, "objective": 0.042594779282808304, "ranking_idealized": 0.7291666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.7124999761581421, "regularize": 0.04259476438164711, "step": 790 }, { "dpo_loss": 0.523327112197876, "epoch": 4.506376948512045, "grad_norm": 6.468243868538749, "learning_rate": 1.4075994731016895e-07, "logits": -0.7410406470298767, "logps": -74.17599487304688, "loss": 0.037, "objective": 0.03256476670503616, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6499999761581421, "regularize": 0.03256473317742348, "step": 795 }, { "dpo_loss": 0.5217226147651672, "epoch": 4.534718941898913, "grad_norm": 6.810051199376682, "learning_rate": 1.2482220564763669e-07, "logits": -0.7353635430335999, "logps": -72.93279266357422, "loss": 0.0382, "objective": 0.03855961933732033, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6916666626930237, "regularize": 0.03855961188673973, "step": 800 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 0.6858941912651062, "eval_logits": -0.8972411155700684, "eval_logps": -77.24726867675781, "eval_loss": 0.4401608407497406, "eval_objective": 0.4384419023990631, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5320248007774353, "eval_regularize": 0.4384419023990631, "eval_runtime": 259.7863, "eval_samples_per_second": 22.288, "eval_steps_per_second": 0.932, "step": 800 }, { "dpo_loss": 0.5223442316055298, "epoch": 4.563060935285781, "grad_norm": 6.589373819397174, "learning_rate": 1.0981873156594381e-07, "logits": -0.8119328022003174, "logps": -71.91402435302734, "loss": 0.0371, "objective": 0.03391700237989426, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.6541666388511658, "regularize": 0.03391699120402336, "step": 805 }, { "dpo_loss": 0.5284490585327148, "epoch": 4.59140292867265, "grad_norm": 6.877467315737168, "learning_rate": 9.575542662726756e-08, "logits": -0.8636207580566406, "logps": -71.29000854492188, "loss": 0.0362, "objective": 0.034173477441072464, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.6000000238418579, "regularize": 0.034173473715782166, "step": 810 }, { "dpo_loss": 0.532176673412323, "epoch": 4.619744922059518, "grad_norm": 6.410472181963987, "learning_rate": 8.26378225816582e-08, "logits": -0.7376688122749329, "logps": -72.42508697509766, "loss": 0.0338, "objective": 0.03282972797751427, "ranking_idealized": 0.7250000238418579, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.737500011920929, "regularize": 0.03282969817519188, "step": 815 }, { "dpo_loss": 0.5355073809623718, "epoch": 4.648086915446386, "grad_norm": 6.840519860011787, "learning_rate": 7.047107919114588e-08, "logits": -0.8241658806800842, "logps": -72.70903778076172, "loss": 0.0329, "objective": 0.029066000133752823, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6875, "regularize": 0.029065988957881927, "step": 820 }, { "dpo_loss": 0.5363853573799133, "epoch": 4.6764289088332545, "grad_norm": 6.5036481206185615, "learning_rate": 5.92599822001666e-08, "logits": -0.7593883872032166, "logps": -70.98949432373047, "loss": 0.0362, "objective": 0.03307075425982475, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6041666865348816, "regularize": 0.03307074308395386, "step": 825 }, { "dpo_loss": 0.5210588574409485, "epoch": 4.7047709022201225, "grad_norm": 6.54657496412036, "learning_rate": 4.9008941453107527e-08, "logits": -0.882694661617279, "logps": -72.7275390625, "loss": 0.0382, "objective": 0.04189787432551384, "ranking_idealized": 0.675000011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6875, "regularize": 0.04189785197377205, "step": 830 }, { "dpo_loss": 0.5095342993736267, "epoch": 4.733112895606991, "grad_norm": 6.620950665910206, "learning_rate": 3.972198915970976e-08, "logits": -0.8053682446479797, "logps": -73.1785888671875, "loss": 0.0356, "objective": 0.03564568608999252, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.6416666507720947, "regularize": 0.03564564883708954, "step": 835 }, { "dpo_loss": 0.5004899501800537, "epoch": 4.7614548889938595, "grad_norm": 6.845348700884689, "learning_rate": 3.1402778309014284e-08, "logits": -0.8175690174102783, "logps": -73.01248168945312, "loss": 0.0343, "objective": 0.030798058956861496, "ranking_idealized": 0.6958333253860474, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6875, "regularize": 0.03079797886312008, "step": 840 }, { "dpo_loss": 0.5335594415664673, "epoch": 4.7897968823807275, "grad_norm": 6.869470340595315, "learning_rate": 2.4054581232470785e-08, "logits": -0.823119044303894, "logps": -72.64087677001953, "loss": 0.0337, "objective": 0.03280922397971153, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.6416666507720947, "regularize": 0.032809216529130936, "step": 845 }, { "dpo_loss": 0.5230380892753601, "epoch": 4.818138875767596, "grad_norm": 6.728592904487897, "learning_rate": 1.768028831677926e-08, "logits": -0.8133633732795715, "logps": -71.71713256835938, "loss": 0.032, "objective": 0.028577150776982307, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6833333373069763, "regularize": 0.028577139601111412, "step": 850 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 0.686019778251648, "eval_logits": -0.8982982635498047, "eval_logps": -77.20530700683594, "eval_loss": 0.440197616815567, "eval_objective": 0.4385392367839813, "eval_ranking_idealized": 0.6570248007774353, "eval_ranking_idealized_expo": 0.5113636255264282, "eval_ranking_simple": 0.5320248007774353, "eval_regularize": 0.4385392367839813, "eval_runtime": 259.2486, "eval_samples_per_second": 22.334, "eval_steps_per_second": 0.933, "step": 850 }, { "dpo_loss": 0.533964216709137, "epoch": 4.846480869154464, "grad_norm": 6.460809600085337, "learning_rate": 1.2282406866966078e-08, "logits": -0.7880451679229736, "logps": -72.03130340576172, "loss": 0.033, "objective": 0.03257838636636734, "ranking_idealized": 0.7083333134651184, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.7083333134651184, "regularize": 0.032578371465206146, "step": 855 }, { "dpo_loss": 0.5133672952651978, "epoch": 4.874822862541333, "grad_norm": 6.904438097045362, "learning_rate": 7.863060120144316e-09, "logits": -0.7785756587982178, "logps": -71.80149841308594, "loss": 0.034, "objective": 0.03182807192206383, "ranking_idealized": 0.7166666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.7124999761581421, "regularize": 0.031828057020902634, "step": 860 }, { "dpo_loss": 0.5214442014694214, "epoch": 4.903164855928201, "grad_norm": 6.578317256063193, "learning_rate": 4.423986410346526e-09, "logits": -0.8345638513565063, "logps": -70.81376647949219, "loss": 0.0332, "objective": 0.037510212510824203, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.6583333611488342, "regularize": 0.03751020506024361, "step": 865 }, { "dpo_loss": 0.5174622535705566, "epoch": 4.931506849315069, "grad_norm": 6.93105315931173, "learning_rate": 1.9665384847583622e-09, "logits": -0.8424772024154663, "logps": -72.85356903076172, "loss": 0.0344, "objective": 0.0343938022851944, "ranking_idealized": 0.6916666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6875, "regularize": 0.034393779933452606, "step": 870 }, { "dpo_loss": 0.524601399898529, "epoch": 4.959848842701937, "grad_norm": 6.733993703356916, "learning_rate": 4.916829716183901e-10, "logits": -0.790997326374054, "logps": -72.49830627441406, "loss": 0.0325, "objective": 0.03138989955186844, "ranking_idealized": 0.6833333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.675000011920929, "regularize": 0.03138989210128784, "step": 875 }, { "dpo_loss": 0.5205584764480591, "epoch": 4.988190836088805, "grad_norm": 6.678618993937897, "learning_rate": 0.0, "logits": -0.8767089247703552, "logps": -72.85419464111328, "loss": 0.0328, "objective": 0.030487608164548874, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6625000238418579, "regularize": 0.030487585812807083, "step": 880 }, { "epoch": 4.988190836088805, "step": 880, "total_flos": 0.0, "train_loss": 0.15004684163088147, "train_runtime": 35354.4588, "train_samples_per_second": 7.185, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 880, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }